from functools import partial

from nlu.components.assertions.assertion_dl.assertion_dl import AssertionDL
from nlu.components.assertions.assertion_log_reg.assertion_log_reg import AssertionLogReg
from nlu.components.assertions.few_shot_assertion_classifier.few_shot_assertion_classifier import FewShotAssertionClassifierModel
from nlu.components.chunkers.chunk_mapper.chunk_mapper import ChunkMapper
from nlu.components.chunkers.contextual_parser.contextual_parser import ContextualParser
from nlu.components.chunkers.default_chunker.default_chunker import DefaultChunker
from nlu.components.chunkers.ngram.ngram import NGram
from nlu.components.classifiers.asr.wav2Vec import Wav2Vec
from nlu.components.classifiers.asr_hubert.hubert import Hubert
from nlu.components.classifiers.asr_whisper.whisper import Whisper
from nlu.components.classifiers.xlm_roberta_zero_shot_classification.xlm_roberta_zero_shot import XlmRobertaZeroShotClassifier
from nlu.components.classifiers.bert_zero_shot_classification.bert_zero_shot import BertZeroShotClassifier
from nlu.components.classifiers.bart_zero_shot_classification.bart_zero_shot import BartZeroShotClassifier
from nlu.components.classifiers.classifier_dl.classifier_dl import ClassifierDl
from nlu.components.classifiers.distil_bert_zero_shot_classification.distil_bert_zero_shot import \
    DistilBertZeroShotClassifier

from nlu.components.classifiers.deberta_zero_shot.deberta_zero_shot import DeBertaZeroShotClassifier


from nlu.components.classifiers.generic_classifier.generic_classifier import GenericClassifier
from nlu.components.classifiers.image_classification_swin.swin import SwinImageClassifier
from nlu.components.classifiers.image_classification_vit.convnext_image_classification import ConvNextImageClassifier
from nlu.components.classifiers.image_classification_vit.vit_image_classifier import VitImageClassifier
from nlu.components.classifiers.language_detector.language_detector import LanguageDetector
from nlu.components.classifiers.multi_classifier.multi_classifier import MultiClassifier
from nlu.components.classifiers.named_entity_recognizer_crf.ner_crf import NERDLCRF
from nlu.components.classifiers.ner.ner_dl import NERDL
from nlu.components.classifiers.ner_healthcare.ner_dl_healthcare import NERDLHealthcare
from nlu.components.classifiers.ner_zero_shot.ner_zero_shot import ZeroShotNer
from nlu.components.classifiers.pos.part_of_speech_jsl import PartOfSpeechJsl
from nlu.components.classifiers.sentiment_detector.sentiment_detector import Sentiment
from nlu.components.classifiers.sentiment_dl.sentiment_dl import SentimentDl
from nlu.components.classifiers.seq_albert.seq_albert import SeqAlbertClassifier
from nlu.components.classifiers.seq_bert.seq_bert_classifier import SeqBertClassifier
from nlu.components.classifiers.seq_bert_medical.seq_bert_medical_classifier import SeqBertMedicalClassifier
from nlu.components.classifiers.seq_camembert.seq_camembert import SeqCamembertClassifier
from nlu.components.classifiers.seq_deberta.seq_deberta_classifier import SeqDebertaClassifier
from nlu.components.classifiers.seq_distilbert.seq_distilbert_classifier import SeqDilstilBertClassifier
from nlu.components.classifiers.seq_distilbert_medical.seq_distilbert_medical_classifier import \
    SeqDilstilBertMedicalClassifier
from nlu.components.classifiers.seq_longformer.seq_longformer import SeqLongformerClassifier
from nlu.components.classifiers.seq_roberta.seq_roberta import SeqRobertaClassifier
from nlu.components.classifiers.seq_xlm_roberta.seq_xlm_roberta import SeqXlmRobertaClassifier
from nlu.components.classifiers.seq_xlnet.seq_xlnet import SeqXlnetClassifier
from nlu.components.classifiers.seq_mpnet.seq_mpnet import SeqMPNetClassifier 
from nlu.components.classifiers.span_bert.span_bert import SpanBertClassifier
from nlu.components.classifiers.span_albert.span_albert import SpanAlbertClassifier
from nlu.components.classifiers.span_camembert.span_camembert import SpanCamemBert
from nlu.components.classifiers.span_deberta.span_deberta import SpanDeBertaClassifier
from nlu.components.classifiers.span_distilbert.span_distilbert import SpanDistilBertClassifier
from nlu.components.classifiers.span_longformer.span_longformer import SpanLongFormerClassifier
from nlu.components.classifiers.span_roberta.span_roberta import SpanRobertaClassifier
from nlu.components.classifiers.span_xlm_roberta.span_xlm_roberta import SpanXlmRobertaClassifier
from nlu.components.classifiers.span_medical.span_medical import SpanMedical
from nlu.components.classifiers.token_albert.token_albert import TokenAlbert
from nlu.components.classifiers.token_bert.token_bert import TokenBert
from nlu.components.classifiers.token_bert_healthcare.token_bert_healthcare import TokenBertHealthcare
from nlu.components.classifiers.token_camembert.token_camembert import TokenCamembert
from nlu.components.classifiers.token_deberta.token_deberta import TokenDeBerta
from nlu.components.classifiers.token_distilbert.token_distilbert import TokenDistilBert
from nlu.components.classifiers.token_longformer.token_longformer import TokenLongFormer
from nlu.components.classifiers.token_roberta.token_roberta import TokenRoBerta
from nlu.components.classifiers.token_xlm_roberta.token_xlmroberta import TokenXlmRoBerta
from nlu.components.classifiers.token_xlnet.token_xlnet import TokenXlnet
from nlu.components.classifiers.vivekn_sentiment.vivekn_sentiment_detector import ViveknSentiment
from nlu.components.classifiers.yake.yake import Yake
from nlu.components.coref.coref_bert.coref_bert import CorefBert
from nlu.components.deidentifiers.deidentifier.deidentifier import Deidentifier
from nlu.components.dependency_typeds.labeled_dependency_parser.labeled_dependency_parser import \
    LabeledDependencyParser
from nlu.components.dependency_untypeds.unlabeled_dependency_parser.unlabeled_dependency_parser import \
    UnlabeledDependencyParser
from nlu.components.embeddings.albert.spark_nlp_albert import SparkNLPAlbert
from nlu.components.embeddings.bert.spark_nlp_bert import SparkNLPBert
from nlu.components.embeddings.bert_sentence_chunk.bert_sentence_chunk import BertSentenceChunkEmbeds
from nlu.components.embeddings.camenbert.camenbert import CamemBert
from nlu.components.embeddings.deberta.deberta import Deberta
from nlu.components.embeddings.distil_bert.distilbert import DistilBert
from nlu.components.embeddings.doc2vec.doc2vec import Doc2Vec
from nlu.components.embeddings.elmo.spark_nlp_elmo import SparkNLPElmo
from nlu.components.embeddings.glove.glove import Glove
from nlu.components.embeddings.longformer.longformer import Longformer
from nlu.components.embeddings.roberta.roberta import Roberta
from nlu.components.embeddings.sentence_e5.E5SentenceEmbedding import E5
from nlu.components.embeddings.sentence_bge.BGESentenceEmbedding import BGE
from nlu.components.embeddings.sentence_bert.BertSentenceEmbedding import BertSentence
from nlu.components.embeddings.sentence_roberta.RobertaSentenceEmbedding import RobertaSentence
from nlu.components.embeddings.sentence_mpnet.MPNetSentenceEmbedding import MPNetSentence
from nlu.components.embeddings.instructor_sentence.InstructorEmbeddings import Instructor
from nlu.components.embeddings.sentence_xlm.sentence_xlm import Sentence_XLM
from nlu.components.embeddings.use.spark_nlp_use import SparkNLPUse
from nlu.components.embeddings.word2vec.word2vec import Word2Vec
from nlu.components.embeddings.xlm.xlm import XLM
from nlu.components.embeddings.xlnet.spark_nlp_xlnet import SparkNLPXlnet
from nlu.components.embeddings_chunks.chunk_embedder.chunk_embedder import ChunkEmbedder
from nlu.components.lemmatizers.lemmatizer.spark_nlp_lemmatizer import SparkNLPLemmatizer
from nlu.components.matchers.regex_matcher.regex_matcher import RegexMatcher
from nlu.components.normalizers.document_normalizer.spark_nlp_document_normalizer import SparkNLPDocumentNormalizer
from nlu.components.normalizers.drug_normalizer.drug_normalizer import DrugNorm
from nlu.components.normalizers.normalizer.spark_nlp_normalizer import SparkNLPNormalizer
from nlu.components.relation_extractors.relation_extractor.relation_extractor import RelationExtraction
from nlu.components.relation_extractors.relation_extractor_dl.relation_extractor_dl import RelationExtractionDL
from nlu.components.relation_extractors.zero_shot_relation_extractor.zero_shot_relation_extractor import \
    ZeroShotRelationExtractor
from nlu.components.resolutions.sentence_entity_resolver.sentence_resolver import SentenceResolver
from nlu.components.sentence_detectors.deep_sentence_detector.deep_sentence_detector import SentenceDetectorDeep
from nlu.components.sentence_detectors.pragmatic_sentence_detector.sentence_detector import PragmaticSentenceDetector
from nlu.components.seq2seqs.bart_transformer.bart_transformer import SparkNLPBartTransformer
from nlu.components.seq2seqs.m2m100_transformer.m2m100_transformer import M2M100
from nlu.components.seq2seqs.gpt2.gpt2 import GPT2
from nlu.components.seq2seqs.openai_completion.openai_completion import OpenaiCompletion
from nlu.components.embeddings.openai_embeddings.openai_embeddings import OpenaiEmbeddings
from nlu.components.seq2seqs.marian.marian import Marian
from nlu.components.seq2seqs.med_summarizer.med_summarizer import MedSummarizer
from nlu.components.seq2seqs.med_text_generator.med_text_generator import MedTextGenerator
from nlu.components.seq2seqs.t5.t5 import T5
from nlu.components.seq2seqs.tapas_qa.tapas_qa import TapasQA
from nlu.components.spell_checkers.context_spell.context_spell_checker import ContextSpellChecker
from nlu.components.spell_checkers.norvig_spell.norvig_spell_checker import NorvigSpellChecker
from nlu.components.spell_checkers.symmetric_spell.symmetric_spell_checker import SymmetricSpellChecker
from nlu.components.stemmers.stemmer.spark_nlp_stemmer import SparkNLPStemmer
from nlu.components.stopwordscleaners.stopwordcleaner.nlustopwordcleaner import NLUStopWordcleaner
from nlu.components.tokenizers.default_tokenizer.default_tokenizer import DefaultTokenizer
from nlu.components.tokenizers.regex_tokenizer.regex_tokenizer import RegexTokenizer
from nlu.components.tokenizers.word_segmenter.word_segmenter import WordSegmenter
from nlu.components.utils.audio_assembler.audio_assembler import AudioAssembler_
from nlu.components.utils.chunk_2_doc.doc_2_chunk import Chunk_2_Doc
from nlu.components.utils.doc2chunk.doc_2_chunk import Doc_2_Chunk
from nlu.components.utils.document_assembler.spark_nlp_document_assembler import SparkNlpDocumentAssembler
from nlu.components.utils.image_assembler.spark_nlp_image_assembler import SparkNlpImageAssembler
from nlu.components.utils.multi_document_assembler.spark_nlp_multi_document_assembler import \
    SparkNlpMultiDocumentAssembler
from nlu.components.utils.ner_to_chunk_converter.ner_to_chunk_converter import NerToChunkConverter
from nlu.components.utils.ner_to_chunk_converter_licensed.ner_to_chunk_converter_licensed import \
    NerToChunkConverterLicensed
from nlu.components.utils.sdf_finisher.sdf_finisher import SdfFinisher
from nlu.components.utils.sentence_embeddings.spark_nlp_sentence_embedding import SparkNLPSentenceEmbeddings
from nlu.components.utils.table_assembler.spark_nlp_multi_document_assembler import SparkNlpTableAssembler
from nlu.ocr_components.table_extractors.doc_table_extractor.doc2table import Doc2TextTable
from nlu.ocr_components.table_extractors.pdf_table_extractor.pdf2table import PDF2TextTable
from nlu.ocr_components.table_extractors.pdf2image.pdf2image import PDF2Image
from nlu.ocr_components.table_extractors.imag2pdf.image2pdf import Image2PDF
from nlu.ocr_components.table_extractors.ppt_table_extractor.ppt2table import PPT2TextTable
from nlu.ocr_components.visual_classifiers.visual_document_classifier.visual_document_classifier import VisualDocClassifier
from nlu.ocr_components.text_recognizers.doc2text.doc2text import Doc2Text
from nlu.ocr_components.text_recognizers.img2text.img2text import Img2Text
from nlu.ocr_components.text_recognizers.pdf2text.pdf2text import Pdf2Text
from nlu.ocr_components.utils.binary2image.binary2image import Binary2Image
from nlu.ocr_components.utils.hocr_tokenizer.hocr_tokenizer import HocrTokenizer
from nlu.ocr_components.utils.image2hocr.image2hocr import Image2Hocr
from nlu.ocr_components.table_extractors.image2table.image2table import IMAGE_TABLE_DETECTOR
from nlu.ocr_components.visual_ner.visual_document_ner.visual_document_ner import VisualDocumentNer
from nlu.ocr_components.table_extractors.image2table_cell.image2table_cell import ImageTableCellDetector
from nlu.ocr_components.form_relation_extractor.form_relation_extractor import FormRelationExtractor
from nlu.ocr_components.table_extractors.image_table_cell2text.image_table_cell2text import ImageTable2Cell2TextTable
from nlu.ocr_components.utils.image_split_regions.image_split_regions import ImageSplitRegions
from nlu.ocr_components.utils.image_draw_regions.image_draw_regions import ImageDrawRegions
from nlu.ocr_components.utils.position_finder.position_finder import PositionFinder
# from nlu.ocr_components.visual_classifiers.visual_doc_classifier.visual_doc_classifier import VisualDocClassifier
from nlu.pipe.col_substitution.col_substitution_HC import *
from nlu.pipe.col_substitution.col_substitution_OCR import substitute_recognized_text_cols, \
    substitute_document_classifier_text_cols, substitute_form_extractor_text_cols
from nlu.pipe.col_substitution.col_substitution_OCR import substitute_recognized_text_cols,substitute_document_ner_cols
from nlu.pipe.col_substitution.col_substitution_OS import *
from nlu.pipe.extractors.extractor_configs_HC import *
from nlu.pipe.extractors.extractor_configs_OCR import default_text_recognizer_config, default_binary_to_image_config, \
    default_visual_classifier_config,default_form_relation_extractor_config, default_position_finder_config
from nlu.pipe.extractors.extractor_configs_OCR import default_text_recognizer_config, default_binary_to_image_config, default_visual_ner_config, default_pdf_to_image_config
from nlu.pipe.extractors.extractor_configs_OS import *
from nlu.pipe.nlu_component import NluComponent
from nlu.universe.annotator_class_universe import AnnoClassRef
from nlu.universe.atoms import JslAnnoId, LicenseType, JslAnnoPyClass
from nlu.universe.feature_node_ids import NLP_NODE_IDS, NLP_HC_NODE_IDS
from nlu.universe.feature_node_ids import OCR_NODE_IDS
from nlu.universe.feature_node_universes import NLP_FEATURE_NODES
from nlu.universe.feature_node_universes import NLP_HC_FEATURE_NODES, OCR_FEATURE_NODES
from nlu.universe.feature_universes import NLP_FEATURES
from nlu.universe.logic_universes import NLP_LEVELS, AnnoTypes
from nlu.universe.universes import ComponentBackends
from nlu.universe.universes import Licenses, ComputeContexts


def anno_class_to_empty_component(anno_class) -> NluComponent:
    """
    For a given anno-class returns NLU-Component which wraps the corresponding pipe class
    but has no model_anno_obj yet loaded onto it.
    :param anno_class: compatible nlu-component to find for
    :return: NluComponent which can load anno_class models
    """
    jsl_anno_id = anno_class_to_jsl_id(anno_class)
    try:
        if jsl_anno_id not in ComponentUniverse.components:
            raise ValueError(f'Invalid JSL-Anno-ID={jsl_anno_id}')
        component = ComponentUniverse.components[jsl_anno_id]()
    except Exception as err:
        raise ValueError(f'Failed to create annotator for JSL-Anno-ID={jsl_anno_id}, error={err}')
    return component


def jsl_id_to_empty_component(jsl_id) -> NluComponent:
    """
    Get NLU component with given JSL-ID with no model_anno_obj loaded onto it
    :param jsl_id: identifier of component/pipe type
    :return: NluComponent for jsl_id
    """
    return anno_class_to_empty_component(jsl_id_to_anno_class(jsl_id))


def jsl_id_to_anno_class(jsl_id) -> JslAnnoPyClass:
    """Returns anno_class name for jsl_id
    :param jsl_id: id of anno
    :return JslAnnoPyClass : cass of annotator
    """
    if jsl_id in AnnoClassRef.JSL_anno2_py_class:
        anno_class = AnnoClassRef.JSL_anno2_py_class[jsl_id]
    elif jsl_id in AnnoClassRef.JSL_anno_HC_ref_2_py_class:
        anno_class = AnnoClassRef.JSL_anno_HC_ref_2_py_class[jsl_id]
    elif jsl_id in AnnoClassRef.JSL_anno_OCR_ref_2_py_class:
        anno_class = AnnoClassRef.JSL_anno_OCR_ref_2_py_class[jsl_id]
    else:
        raise ValueError(f'Cannot find anno_class for jsl-id={jsl_id}')
    return anno_class


def anno_class_to_jsl_id(anno_class) -> JslAnnoId:
    """Returns JSL-Anno-ID and default license type for given anno_class name.
    Note that an anno which maps to a component with default OS_license,
    may load a HC model_anno_obj and nlu component must be updated to HC license then
    :param anno_class: class name of the annotator
    :return:JslAnnoID of anno class
    """
    if anno_class in AnnoClassRef.get_os_pyclass_2_anno_id_dict():
        jsl_anno_id = AnnoClassRef.get_os_pyclass_2_anno_id_dict()[anno_class]
    elif anno_class in AnnoClassRef.get_hc_pyclass_2_anno_id_dict():
        jsl_anno_id = AnnoClassRef.get_hc_pyclass_2_anno_id_dict()[anno_class]
    elif anno_class in AnnoClassRef.get_ocr_pyclass_2_anno_id_dict():
        jsl_anno_id = AnnoClassRef.get_ocr_pyclass_2_anno_id_dict()[anno_class]
    else:
        raise ValueError(f'Cannot get class metadata for invalid anno_class={anno_class}')
    return jsl_anno_id


def get_anno_class_metadata(anno_class) -> Tuple[JslAnnoId, LicenseType]:
    """Returns JSL-Anno-ID and default license type for given anno_class name.
    Note that an anno which maps to a component with default OS_license,
    may load a HC model_anno_obj and nlu component must be updated to HC license then
    :param anno_class: class name of the annotator
    :return: Tuple, first entry JslAnnoID, second entry Default LicenseType
    """
    if anno_class in AnnoClassRef.JSL_OS_py_class_2_anno_id:
        jsl_anno_id = AnnoClassRef.JSL_OS_py_class_2_anno_id[anno_class]
        license_type = Licenses.open_source
    elif anno_class in AnnoClassRef.JSL_HC_py_class_2_anno_id:
        jsl_anno_id = AnnoClassRef.JSL_HC_py_class_2_anno_id[anno_class]
        license_type = Licenses.open_source
    elif anno_class in AnnoClassRef.JSL_OCR_py_class_2_anno_id:
        jsl_anno_id = AnnoClassRef.JSL_OCR_py_class_2_anno_id[anno_class]
        license_type = Licenses.open_source
    else:
        raise ValueError(f'Cannot get class metadata for invalid anno_class={anno_class}')
    return jsl_anno_id, license_type


class ComponentUniverse:
    # Encapsulate all Open Source components Constructors by mappping each individual Annotator class to a specific Construction
    A = NLP_NODE_IDS
    H_A = NLP_HC_NODE_IDS
    O_A = OCR_NODE_IDS
    T = AnnoTypes
    F = NLP_FEATURES
    L = NLP_LEVELS
    ACR = AnnoClassRef
    # os_components = {}
    # hc_components = {}
    # ocr_components = {}
    components = {
        #### Partially Implemented

        A.PARTIALLY_IMPLEMENTED: partial(NluComponent,
                                         name=A.PARTIALLY_IMPLEMENTED,
                                         jsl_anno_class_id=A.PARTIALLY_IMPLEMENTED,
                                         jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIALLY_IMPLEMENTED],
                                         node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                         type=T.PARTIALLY_READY,
                                         pdf_extractor_methods={'default': default_partial_implement_config,
                                                                'default_full': default_full_config, },
                                         pdf_col_name_substitutor=partially_implemented_substitutor,
                                         output_level=L.DOCUMENT,
                                         description='Not fully integrated',
                                         provider=ComponentBackends.open_source,
                                         license=Licenses.open_source,
                                         computation_context=ComputeContexts.spark,
                                         output_context=ComputeContexts.spark,
                                         ),
        A.PARTIAL_Router: partial(NluComponent,
                                  name=A.PARTIAL_Router,
                                  jsl_anno_class_id=A.PARTIAL_Router,
                                  jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_Router],
                                  node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                  type=T.PARTIALLY_READY,
                                  pdf_extractor_methods={'default': default_full_config,
                                                         'default_full': default_full_config, },
                                  pdf_col_name_substitutor=partially_implemented_substitutor,
                                  output_level=L.CHUNK,
                                  description='Not fully integrated',
                                  provider=ComponentBackends.open_source,
                                  license=Licenses.open_source,
                                  computation_context=ComputeContexts.spark,
                                  output_context=ComputeContexts.spark,
                                  ),

        A.PARTIAL_ChunkMergeApproach: partial(NluComponent,
                                              name=A.PARTIAL_ChunkMergeApproach,
                                              jsl_anno_class_id=A.PARTIAL_ChunkMergeApproach,
                                              jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_ChunkMergeApproach],
                                              node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                              type=T.PARTIALLY_READY,
                                              pdf_extractor_methods={'default': default_partial_implement_config,
                                                                     'default_full': default_full_config, },
                                              pdf_col_name_substitutor=partially_implemented_substitutor,
                                              output_level=L.CHUNK,
                                              description='Not fully integrated',
                                              provider=ComponentBackends.open_source,
                                              license=Licenses.open_source,
                                              computation_context=ComputeContexts.spark,
                                              output_context=ComputeContexts.spark,
                                              ),

        A.PARTIAL_AssertionFilterer: partial(NluComponent,
                                             name=A.PARTIAL_ChunkMergeApproach,
                                             jsl_anno_class_id=A.PARTIAL_AssertionFilterer,
                                             jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_AssertionFilterer],
                                             node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                             type=T.PARTIALLY_READY,
                                             pdf_extractor_methods={'default': default_partial_implement_config,
                                                                    'default_full': default_full_config, },
                                             pdf_col_name_substitutor=partially_implemented_substitutor,
                                             output_level=L.CHUNK,
                                             description='Not fully integrated',
                                             provider=ComponentBackends.open_source,
                                             license=Licenses.open_source,
                                             computation_context=ComputeContexts.spark,
                                             output_context=ComputeContexts.spark,
                                             ),

        A.PARTIAL_ChunkConverter: partial(NluComponent,
                                          name=A.PARTIAL_ChunkMergeApproach,
                                          jsl_anno_class_id=A.PARTIAL_ChunkConverter,
                                          jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_ChunkConverter],
                                          node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                          type=T.PARTIALLY_READY,
                                          pdf_extractor_methods={'default': default_partial_implement_config,
                                                                 'default_full': default_full_config, },
                                          pdf_col_name_substitutor=partially_implemented_substitutor,
                                          output_level=L.DOCUMENT,
                                          description='Not fully integrated',
                                          provider=ComponentBackends.open_source,
                                          license=Licenses.open_source,
                                          computation_context=ComputeContexts.spark,
                                          output_context=ComputeContexts.spark,
                                          ),

        A.PARTIAL_ChunkKeyPhraseExtraction: partial(NluComponent,
                                                    name=A.PARTIAL_ChunkMergeApproach,
                                                    jsl_anno_class_id=A.PARTIAL_ChunkKeyPhraseExtraction,
                                                    jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                        A.PARTIAL_ChunkKeyPhraseExtraction],
                                                    node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                                    type=T.PARTIALLY_READY,
                                                    pdf_extractor_methods={'default': default_partial_implement_config,
                                                                           'default_full': default_full_config, },
                                                    pdf_col_name_substitutor=partially_implemented_substitutor,
                                                    output_level=L.DOCUMENT,
                                                    description='Not fully integrated',
                                                    provider=ComponentBackends.open_source,
                                                    license=Licenses.open_source,
                                                    computation_context=ComputeContexts.spark,
                                                    output_context=ComputeContexts.spark,
                                                    ),

        A.PARTIAL_ChunkSentenceSplitter: partial(NluComponent,
                                                 name=A.PARTIAL_ChunkMergeApproach,
                                                 jsl_anno_class_id=A.PARTIAL_ChunkSentenceSplitter,
                                                 jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                     A.PARTIAL_ChunkSentenceSplitter],
                                                 node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                                 type=T.PARTIALLY_READY,
                                                 pdf_extractor_methods={'default': default_partial_implement_config,
                                                                        'default_full': default_full_config, },
                                                 pdf_col_name_substitutor=partially_implemented_substitutor,
                                                 output_level=L.DOCUMENT,
                                                 description='Not fully integrated',
                                                 provider=ComponentBackends.open_source,
                                                 license=Licenses.open_source,
                                                 computation_context=ComputeContexts.spark,
                                                 output_context=ComputeContexts.spark,
                                                 ),

        A.PARTIAL_ChunkFiltererApproach: partial(NluComponent,
                                                 name=A.PARTIAL_ChunkMergeApproach,
                                                 jsl_anno_class_id=A.PARTIAL_ChunkFiltererApproach,
                                                 jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                     A.PARTIAL_ChunkFiltererApproach],
                                                 node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                                 type=T.PARTIALLY_READY,
                                                 pdf_extractor_methods={'default': default_partial_implement_config,
                                                                        'default_full': default_full_config, },
                                                 pdf_col_name_substitutor=partially_implemented_substitutor,
                                                 output_level=L.DOCUMENT,
                                                 description='Not fully integrated',
                                                 provider=ComponentBackends.open_source,
                                                 license=Licenses.open_source,
                                                 computation_context=ComputeContexts.spark,
                                                 output_context=ComputeContexts.spark,
                                                 ),

        A.PARTIAL_ChunkFilterer: partial(NluComponent,
                                         name=A.PARTIAL_ChunkMergeApproach,
                                         jsl_anno_class_id=A.PARTIAL_ChunkFilterer,
                                         jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_ChunkFilterer],
                                         node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                         type=T.PARTIALLY_READY,
                                         pdf_extractor_methods={'default': default_partial_implement_config,
                                                                'default_full': default_full_config, },
                                         pdf_col_name_substitutor=partially_implemented_substitutor,
                                         output_level=L.DOCUMENT,
                                         description='Not fully integrated',
                                         provider=ComponentBackends.open_source,
                                         license=Licenses.open_source,
                                         computation_context=ComputeContexts.spark,
                                         output_context=ComputeContexts.spark,
                                         ),

        A.PARTIAL_ChunkMapperApproach: partial(NluComponent,
                                               name=A.PARTIAL_ChunkMergeApproach,
                                               jsl_anno_class_id=A.PARTIAL_ChunkMapperApproach,
                                               jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_ChunkMapperApproach],
                                               node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                               type=T.PARTIALLY_READY,
                                               pdf_extractor_methods={'default': default_partial_implement_config,
                                                                      'default_full': default_full_config, },
                                               pdf_col_name_substitutor=partially_implemented_substitutor,
                                               output_level=L.DOCUMENT,
                                               description='Not fully integrated',
                                               provider=ComponentBackends.open_source,
                                               license=Licenses.open_source,
                                               computation_context=ComputeContexts.spark,
                                               output_context=ComputeContexts.spark,
                                               ),

        A.PARTIAL_ChunkMapperFilterer: partial(NluComponent,
                                               name=A.PARTIAL_ChunkMergeApproach,
                                               jsl_anno_class_id=A.PARTIAL_ChunkMapperFilterer,
                                               jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_ChunkMapperFilterer],
                                               node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                               type=T.PARTIALLY_READY,
                                               pdf_extractor_methods={'default': default_partial_implement_config,
                                                                      'default_full': default_full_config, },
                                               pdf_col_name_substitutor=partially_implemented_substitutor,
                                               output_level=L.DOCUMENT,
                                               description='Not fully integrated',
                                               provider=ComponentBackends.open_source,
                                               license=Licenses.open_source,
                                               computation_context=ComputeContexts.spark,
                                               output_context=ComputeContexts.spark,
                                               ),

        A.PARTIAL_DocumentLogRegClassifierApproach: partial(NluComponent,
                                                            name=A.PARTIAL_ChunkMergeApproach,
                                                            jsl_anno_class_id=A.PARTIAL_DocumentLogRegClassifierApproach,
                                                            jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                                A.PARTIAL_DocumentLogRegClassifierApproach],
                                                            node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                                            type=T.PARTIALLY_READY,
                                                            pdf_extractor_methods={
                                                                'default': default_partial_implement_config,
                                                                'default_full': default_full_config, },
                                                            pdf_col_name_substitutor=partially_implemented_substitutor,
                                                            output_level=L.DOCUMENT,
                                                            description='Not fully integrated',
                                                            provider=ComponentBackends.open_source,
                                                            license=Licenses.open_source,
                                                            computation_context=ComputeContexts.spark,
                                                            output_context=ComputeContexts.spark,
                                                            ),

        A.PARTIAL_DocumentLogRegClassifierModel: partial(NluComponent,
                                                         name=A.PARTIAL_ChunkMergeApproach,
                                                         jsl_anno_class_id=A.PARTIAL_DocumentLogRegClassifierModel,
                                                         jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                             A.PARTIAL_DocumentLogRegClassifierModel],
                                                         node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                                         type=T.PARTIALLY_READY,
                                                         pdf_extractor_methods={
                                                             'default': default_partial_implement_config,
                                                             'default_full': default_full_config, },
                                                         pdf_col_name_substitutor=partially_implemented_substitutor,
                                                         output_level=L.DOCUMENT,
                                                         description='Not fully integrated',
                                                         provider=ComponentBackends.open_source,
                                                         license=Licenses.open_source,
                                                         computation_context=ComputeContexts.spark,
                                                         output_context=ComputeContexts.spark,
                                                         ),

        A.PARTIAL_ContextualParserApproach: partial(NluComponent,
                                                    name=A.PARTIAL_ChunkMergeApproach,
                                                    jsl_anno_class_id=A.PARTIAL_ContextualParserApproach,
                                                    jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                        A.PARTIAL_ContextualParserApproach],
                                                    node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                                    type=T.PARTIALLY_READY,
                                                    pdf_extractor_methods={'default': default_partial_implement_config,
                                                                           'default_full': default_full_config, },
                                                    pdf_col_name_substitutor=partially_implemented_substitutor,
                                                    output_level=L.DOCUMENT,
                                                    description='Not fully integrated',
                                                    provider=ComponentBackends.open_source,
                                                    license=Licenses.open_source,
                                                    computation_context=ComputeContexts.spark,
                                                    output_context=ComputeContexts.spark,
                                                    ),

        A.PARTIAL_ReIdentification: partial(NluComponent,
                                            name=A.PARTIAL_ChunkMergeApproach,
                                            jsl_anno_class_id=A.PARTIAL_ReIdentification,
                                            jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_ReIdentification],
                                            node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                            type=T.PARTIALLY_READY,
                                            pdf_extractor_methods={'default': default_partial_implement_config,
                                                                   'default_full': default_full_config, },
                                            pdf_col_name_substitutor=partially_implemented_substitutor,
                                            output_level=L.DOCUMENT,
                                            description='Not fully integrated',
                                            provider=ComponentBackends.open_source,
                                            license=Licenses.open_source,
                                            computation_context=ComputeContexts.spark,
                                            output_context=ComputeContexts.spark,
                                            ),

        A.PARTIAL_NerDisambiguator: partial(NluComponent,
                                            name=A.PARTIAL_ChunkMergeApproach,
                                            jsl_anno_class_id=A.PARTIAL_NerDisambiguator,
                                            jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_NerDisambiguator],
                                            node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                            type=T.PARTIALLY_READY,
                                            pdf_extractor_methods={'default': default_partial_implement_config,
                                                                   'default_full': default_full_config, },
                                            pdf_col_name_substitutor=partially_implemented_substitutor,
                                            output_level=L.DOCUMENT,
                                            description='Not fully integrated',
                                            provider=ComponentBackends.open_source,
                                            license=Licenses.open_source,
                                            computation_context=ComputeContexts.spark,
                                            output_context=ComputeContexts.spark,
                                            ),

        A.PARTIAL_NerDisambiguatorModel: partial(NluComponent,
                                                 name=A.PARTIAL_ChunkMergeApproach,
                                                 jsl_anno_class_id=A.PARTIAL_NerDisambiguatorModel,
                                                 jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                     A.PARTIAL_NerDisambiguatorModel],
                                                 node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                                 type=T.PARTIALLY_READY,
                                                 pdf_extractor_methods={'default': default_partial_implement_config,
                                                                        'default_full': default_full_config, },
                                                 pdf_col_name_substitutor=partially_implemented_substitutor,
                                                 output_level=L.DOCUMENT,
                                                 description='Not fully integrated',
                                                 provider=ComponentBackends.open_source,
                                                 license=Licenses.open_source,
                                                 computation_context=ComputeContexts.spark,
                                                 output_context=ComputeContexts.spark,
                                                 ),

        A.PARTIAL_AverageEmbeddings: partial(NluComponent,
                                             name=A.PARTIAL_ChunkMergeApproach,
                                             jsl_anno_class_id=A.PARTIAL_AverageEmbeddings,
                                             jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_AverageEmbeddings],
                                             node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                             type=T.PARTIALLY_READY,
                                             pdf_extractor_methods={'default': default_partial_implement_config,
                                                                    'default_full': default_full_config, },
                                             pdf_col_name_substitutor=partially_implemented_substitutor,
                                             output_level=L.DOCUMENT,
                                             description='Not fully integrated',
                                             provider=ComponentBackends.open_source,
                                             license=Licenses.open_source,
                                             computation_context=ComputeContexts.spark,
                                             output_context=ComputeContexts.spark,
                                             ),

        A.PARTIAL_EntityChunkEmbeddings: partial(NluComponent,
                                                 name=A.PARTIAL_ChunkMergeApproach,
                                                 jsl_anno_class_id=A.PARTIAL_EntityChunkEmbeddings,
                                                 jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                     A.PARTIAL_EntityChunkEmbeddings],
                                                 node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                                 type=T.PARTIALLY_READY,
                                                 pdf_extractor_methods={'default': default_partial_implement_config,
                                                                        'default_full': default_full_config, },
                                                 pdf_col_name_substitutor=partially_implemented_substitutor,
                                                 output_level=L.DOCUMENT,
                                                 description='Not fully integrated',
                                                 provider=ComponentBackends.open_source,
                                                 license=Licenses.open_source,
                                                 computation_context=ComputeContexts.spark,
                                                 output_context=ComputeContexts.spark,
                                                 ),

        A.PARTIAL_IOBTagger: partial(NluComponent,
                                     name=A.PARTIAL_ChunkMergeApproach,
                                     jsl_anno_class_id=A.PARTIAL_IOBTagger,
                                     jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_IOBTagger],
                                     node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                     type=T.PARTIALLY_READY,
                                     pdf_extractor_methods={'default': default_partial_implement_config,
                                                            'default_full': default_full_config, },
                                     pdf_col_name_substitutor=partially_implemented_substitutor,
                                     output_level=L.DOCUMENT,
                                     description='Not fully integrated',
                                     provider=ComponentBackends.open_source,
                                     license=Licenses.open_source,
                                     computation_context=ComputeContexts.spark,
                                     output_context=ComputeContexts.spark,
                                     ),

        A.PARTIAL_NerChunker: partial(NluComponent,
                                      name=A.PARTIAL_ChunkMergeApproach,
                                      jsl_anno_class_id=A.PARTIAL_NerChunker,
                                      jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_NerChunker],
                                      node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                      type=T.PARTIALLY_READY,
                                      pdf_extractor_methods={'default': default_partial_implement_config,
                                                             'default_full': default_full_config, },
                                      pdf_col_name_substitutor=partially_implemented_substitutor,
                                      output_level=L.DOCUMENT,
                                      description='Not fully integrated',
                                      provider=ComponentBackends.open_source,
                                      license=Licenses.open_source,
                                      computation_context=ComputeContexts.spark,
                                      output_context=ComputeContexts.spark,
                                      ),

        A.PARTIAL_DateNormalizer: partial(NluComponent,
                                          name=A.PARTIAL_ChunkMergeApproach,
                                          jsl_anno_class_id=A.PARTIAL_DateNormalizer,
                                          jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_DateNormalizer],
                                          node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                          type=T.PARTIALLY_READY,
                                          pdf_extractor_methods={'default': default_partial_implement_config,
                                                                 'default_full': default_full_config, },
                                          pdf_col_name_substitutor=partially_implemented_substitutor,
                                          output_level=L.DOCUMENT,
                                          description='Not fully integrated',
                                          provider=ComponentBackends.open_source,
                                          license=Licenses.open_source,
                                          computation_context=ComputeContexts.spark,
                                          output_context=ComputeContexts.spark,
                                          ),

        A.PARTIAL_RENerChunksFilter: partial(NluComponent,
                                             name=A.PARTIAL_ChunkMergeApproach,
                                             jsl_anno_class_id=A.PARTIAL_RENerChunksFilter,
                                             jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_RENerChunksFilter],
                                             node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                             type=T.PARTIALLY_READY,
                                             pdf_extractor_methods={'default': default_partial_implement_config,
                                                                    'default_full': default_full_config, },
                                             pdf_col_name_substitutor=partially_implemented_substitutor,
                                             output_level=L.DOCUMENT,
                                             description='Not fully integrated',
                                             provider=ComponentBackends.open_source,
                                             license=Licenses.open_source,
                                             computation_context=ComputeContexts.spark,
                                             output_context=ComputeContexts.spark,
                                             ),

        A.PARTIAL_ResolverMerger: partial(NluComponent,
                                          name=A.PARTIAL_ChunkMergeApproach,
                                          jsl_anno_class_id=A.PARTIAL_ResolverMerger,
                                          jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_ResolverMerger],
                                          node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                          type=T.PARTIALLY_READY,
                                          pdf_extractor_methods={'default': default_partial_implement_config,
                                                                 'default_full': default_full_config, },
                                          pdf_col_name_substitutor=partially_implemented_substitutor,
                                          output_level=L.DOCUMENT,
                                          description='Not fully integrated',
                                          provider=ComponentBackends.open_source,
                                          license=Licenses.open_source,
                                          computation_context=ComputeContexts.spark,
                                          output_context=ComputeContexts.spark,
                                          ),

        A.PARTIAL_AnnotationMerger: partial(NluComponent,
                                            name=A.PARTIAL_ChunkMergeApproach,
                                            jsl_anno_class_id=A.PARTIAL_AnnotationMerger,
                                            jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_AnnotationMerger],
                                            node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                            type=T.PARTIALLY_READY,
                                            pdf_extractor_methods={'default': default_partial_implement_config,
                                                                   'default_full': default_full_config, },
                                            pdf_col_name_substitutor=partially_implemented_substitutor,
                                            output_level=L.DOCUMENT,
                                            description='Not fully integrated',
                                            provider=ComponentBackends.open_source,
                                            license=Licenses.open_source,
                                            computation_context=ComputeContexts.spark,
                                            output_context=ComputeContexts.spark,
                                            ),

        A.PARTIAL_Word2VecApproach: partial(NluComponent,
                                            name=A.PARTIAL_ChunkMergeApproach,
                                            jsl_anno_class_id=A.PARTIAL_Word2VecApproach,
                                            jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_Word2VecApproach],
                                            node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                            type=T.PARTIALLY_READY,
                                            pdf_extractor_methods={'default': default_partial_implement_config,
                                                                   'default_full': default_full_config, },
                                            pdf_col_name_substitutor=partially_implemented_substitutor,
                                            output_level=L.DOCUMENT,
                                            description='Not fully integrated',
                                            provider=ComponentBackends.open_source,
                                            license=Licenses.open_source,
                                            computation_context=ComputeContexts.spark,
                                            output_context=ComputeContexts.spark,
                                            ),

        A.PARTIAL_WordEmbeddings: partial(NluComponent,
                                          name=A.PARTIAL_ChunkMergeApproach,
                                          jsl_anno_class_id=A.PARTIAL_WordEmbeddings,
                                          jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_WordEmbeddings],
                                          node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                          type=T.PARTIALLY_READY,
                                          pdf_extractor_methods={'default': default_partial_implement_config,
                                                                 'default_full': default_full_config, },
                                          pdf_col_name_substitutor=partially_implemented_substitutor,
                                          output_level=L.DOCUMENT,
                                          description='Not fully integrated',
                                          provider=ComponentBackends.open_source,
                                          license=Licenses.open_source,
                                          computation_context=ComputeContexts.spark,
                                          output_context=ComputeContexts.spark,
                                          ),

        A.PARTIAL_EntityRulerApproach: partial(NluComponent,
                                               name=A.PARTIAL_ChunkMergeApproach,
                                               jsl_anno_class_id=A.PARTIAL_EntityRulerApproach,
                                               jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_EntityRulerApproach],
                                               node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                               type=T.PARTIALLY_READY,
                                               pdf_extractor_methods={'default': default_partial_implement_config,
                                                                      'default_full': default_full_config, },
                                               pdf_col_name_substitutor=partially_implemented_substitutor,
                                               output_level=L.DOCUMENT,
                                               description='Not fully integrated',
                                               provider=ComponentBackends.open_source,
                                               license=Licenses.open_source,
                                               computation_context=ComputeContexts.spark,
                                               output_context=ComputeContexts.spark,
                                               ),

        A.PARTIAL_EntityRulerModel: partial(NluComponent,
                                            name=A.PARTIAL_ChunkMergeApproach,
                                            jsl_anno_class_id=A.PARTIAL_EntityRulerModel,
                                            jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_EntityRulerModel],
                                            node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                            type=T.PARTIALLY_READY,
                                            pdf_extractor_methods={'default': default_partial_implement_config,
                                                                   'default_full': default_full_config, },
                                            pdf_col_name_substitutor=partially_implemented_substitutor,
                                            output_level=L.DOCUMENT,
                                            description='Not fully integrated',
                                            provider=ComponentBackends.open_source,
                                            license=Licenses.open_source,
                                            computation_context=ComputeContexts.spark,
                                            output_context=ComputeContexts.spark,
                                            ),

        A.PARTIAL_TextMatcherModel: partial(NluComponent,
                                            name=A.PARTIAL_ChunkMergeApproach,
                                            jsl_anno_class_id=A.PARTIAL_TextMatcherModel,
                                            jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_TextMatcherModel],
                                            node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                            type=T.PARTIALLY_READY,
                                            pdf_extractor_methods={'default': default_partial_implement_config,
                                                                   'default_full': default_full_config, },
                                            pdf_col_name_substitutor=partially_implemented_substitutor,
                                            output_level=L.DOCUMENT,
                                            description='Not fully integrated',
                                            provider=ComponentBackends.open_source,
                                            license=Licenses.open_source,
                                            computation_context=ComputeContexts.spark,
                                            output_context=ComputeContexts.spark,
                                            ),

        A.PARTIAL_TextMatcherInternalModel: partial(NluComponent,
                                            name=A.PARTIAL_ChunkMergeApproach,
                                            jsl_anno_class_id=A.PARTIAL_TextMatcherInternalModel,
                                            jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_TextMatcherInternalModel],
                                            node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                            type=T.PARTIALLY_READY,
                                            pdf_extractor_methods={'default': default_partial_implement_config,
                                                                   'default_full': default_full_config, },
                                            pdf_col_name_substitutor=partially_implemented_substitutor,
                                            output_level=L.DOCUMENT,
                                            description='Not fully integrated',
                                            provider=ComponentBackends.open_source,
                                            license=Licenses.open_source,
                                            computation_context=ComputeContexts.spark,
                                            output_context=ComputeContexts.spark,
                                            ),

        A.PARTIAL_BigTextMatcher: partial(NluComponent,
                                          name=A.PARTIAL_ChunkMergeApproach,
                                          jsl_anno_class_id=A.PARTIAL_BigTextMatcher,
                                          jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_BigTextMatcher],
                                          node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                          type=T.PARTIALLY_READY,
                                          pdf_extractor_methods={'default': default_partial_implement_config,
                                                                 'default_full': default_full_config, },
                                          pdf_col_name_substitutor=partially_implemented_substitutor,
                                          output_level=L.DOCUMENT,
                                          description='Not fully integrated',
                                          provider=ComponentBackends.open_source,
                                          license=Licenses.open_source,
                                          computation_context=ComputeContexts.spark,
                                          output_context=ComputeContexts.spark,
                                          ),

        A.PARTIAL_BigTextMatcherModel: partial(NluComponent,
                                               name=A.PARTIAL_ChunkMergeApproach,
                                               jsl_anno_class_id=A.PARTIAL_BigTextMatcherModel,
                                               jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_BigTextMatcherModel],
                                               node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                               type=T.PARTIALLY_READY,
                                               pdf_extractor_methods={'default': default_partial_implement_config,
                                                                      'default_full': default_full_config, },
                                               pdf_col_name_substitutor=partially_implemented_substitutor,
                                               output_level=L.DOCUMENT,
                                               description='Not fully integrated',
                                               provider=ComponentBackends.open_source,
                                               license=Licenses.open_source,
                                               computation_context=ComputeContexts.spark,
                                               output_context=ComputeContexts.spark,
                                               ),

        A.PARTIAL_DateMatcher: partial(NluComponent,
                                       name=A.PARTIAL_ChunkMergeApproach,
                                       jsl_anno_class_id=A.PARTIAL_DateMatcher,
                                       jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_DateMatcher],
                                       node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                       type=T.PARTIALLY_READY,
                                       pdf_extractor_methods={'default': default_partial_implement_config,
                                                              'default_full': default_full_config, },
                                       pdf_col_name_substitutor=partially_implemented_substitutor,
                                       output_level=L.DOCUMENT,
                                       description='Not fully integrated',
                                       provider=ComponentBackends.open_source,
                                       license=Licenses.open_source,
                                       computation_context=ComputeContexts.spark,
                                       output_context=ComputeContexts.spark,
                                       ),

        A.PARTIAL_MultiDateMatcher: partial(NluComponent,
                                            name=A.PARTIAL_ChunkMergeApproach,
                                            jsl_anno_class_id=A.PARTIAL_MultiDateMatcher,
                                            jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_MultiDateMatcher],
                                            node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                            type=T.PARTIALLY_READY,
                                            pdf_extractor_methods={'default': default_partial_implement_config,
                                                                   'default_full': default_full_config, },
                                            pdf_col_name_substitutor=partially_implemented_substitutor,
                                            output_level=L.DOCUMENT,
                                            description='Not fully integrated',
                                            provider=ComponentBackends.open_source,
                                            license=Licenses.open_source,
                                            computation_context=ComputeContexts.spark,
                                            output_context=ComputeContexts.spark,
                                            ),

        A.PARTIAL_RegexMatcher: partial(NluComponent,
                                        name=A.PARTIAL_ChunkMergeApproach,
                                        jsl_anno_class_id=A.PARTIAL_RegexMatcher,
                                        jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_RegexMatcher],
                                        node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                        type=T.PARTIALLY_READY,
                                        pdf_extractor_methods={'default': default_partial_implement_config,
                                                               'default_full': default_full_config, },
                                        pdf_col_name_substitutor=partially_implemented_substitutor,
                                        output_level=L.DOCUMENT,
                                        description='Not fully integrated',
                                        provider=ComponentBackends.open_source,
                                        license=Licenses.open_source,
                                        computation_context=ComputeContexts.spark,
                                        output_context=ComputeContexts.spark,
                                        ),

        A.PARTIAL_TextMatcher: partial(NluComponent,
                                       name=A.PARTIAL_ChunkMergeApproach,
                                       jsl_anno_class_id=A.PARTIAL_TextMatcher,
                                       jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_TextMatcher],
                                       node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                       type=T.PARTIALLY_READY,
                                       pdf_extractor_methods={'default': default_partial_implement_config,
                                                              'default_full': default_full_config, },
                                       pdf_col_name_substitutor=partially_implemented_substitutor,
                                       output_level=L.DOCUMENT,
                                       description='Not fully integrated',
                                       provider=ComponentBackends.open_source,
                                       license=Licenses.open_source,
                                       computation_context=ComputeContexts.spark,
                                       output_context=ComputeContexts.spark,
                                       ),

        A.PARTIAL_NerApproach: partial(NluComponent,
                                       name=A.PARTIAL_ChunkMergeApproach,
                                       jsl_anno_class_id=A.PARTIAL_NerApproach,
                                       jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_NerApproach],
                                       node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                       type=T.PARTIALLY_READY,
                                       pdf_extractor_methods={'default': default_partial_implement_config,
                                                              'default_full': default_full_config, },
                                       pdf_col_name_substitutor=partially_implemented_substitutor,
                                       output_level=L.DOCUMENT,
                                       description='Not fully integrated',
                                       provider=ComponentBackends.open_source,
                                       license=Licenses.open_source,
                                       computation_context=ComputeContexts.spark,
                                       output_context=ComputeContexts.spark,
                                       ),

        A.PARTIAL_NerCrfApproach: partial(NluComponent,
                                          name=A.PARTIAL_ChunkMergeApproach,
                                          jsl_anno_class_id=A.PARTIAL_NerCrfApproach,
                                          jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_NerCrfApproach],
                                          node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                          type=T.PARTIALLY_READY,
                                          pdf_extractor_methods={'default': default_partial_implement_config,
                                                                 'default_full': default_full_config, },
                                          pdf_col_name_substitutor=partially_implemented_substitutor,
                                          output_level=L.DOCUMENT,
                                          description='Not fully integrated',
                                          provider=ComponentBackends.open_source,
                                          license=Licenses.open_source,
                                          computation_context=ComputeContexts.spark,
                                          output_context=ComputeContexts.spark,
                                          ),

        A.PARTIAL_NerOverwriter: partial(NluComponent,
                                         name=A.PARTIAL_ChunkMergeApproach,
                                         jsl_anno_class_id=A.PARTIAL_NerOverwriter,
                                         jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_NerOverwriter],
                                         node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                         type=T.PARTIALLY_READY,
                                         pdf_extractor_methods={'default': default_partial_implement_config,
                                                                'default_full': default_full_config, },
                                         pdf_col_name_substitutor=partially_implemented_substitutor,
                                         output_level=L.DOCUMENT,
                                         description='Not fully integrated',
                                         provider=ComponentBackends.open_source,
                                         license=Licenses.open_source,
                                         computation_context=ComputeContexts.spark,
                                         output_context=ComputeContexts.spark,
                                         ),

        A.PARTIAL_DependencyParserApproach: partial(NluComponent,
                                                    name=A.PARTIAL_ChunkMergeApproach,
                                                    jsl_anno_class_id=A.PARTIAL_DependencyParserApproach,
                                                    jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                        A.PARTIAL_DependencyParserApproach],
                                                    node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                                    type=T.PARTIALLY_READY,
                                                    pdf_extractor_methods={'default': default_partial_implement_config,
                                                                           'default_full': default_full_config, },
                                                    pdf_col_name_substitutor=partially_implemented_substitutor,
                                                    output_level=L.DOCUMENT,
                                                    description='Not fully integrated',
                                                    provider=ComponentBackends.open_source,
                                                    license=Licenses.open_source,
                                                    computation_context=ComputeContexts.spark,
                                                    output_context=ComputeContexts.spark,
                                                    ),

        A.PARTIAL_TypedDependencyParserApproach: partial(NluComponent,
                                                         name=A.PARTIAL_ChunkMergeApproach,
                                                         jsl_anno_class_id=A.PARTIAL_TypedDependencyParserApproach,
                                                         jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                             A.PARTIAL_TypedDependencyParserApproach],
                                                         node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                                         type=T.PARTIALLY_READY,
                                                         pdf_extractor_methods={
                                                             'default': default_partial_implement_config,
                                                             'default_full': default_full_config, },
                                                         pdf_col_name_substitutor=partially_implemented_substitutor,
                                                         output_level=L.DOCUMENT,
                                                         description='Not fully integrated',
                                                         provider=ComponentBackends.open_source,
                                                         license=Licenses.open_source,
                                                         computation_context=ComputeContexts.spark,
                                                         output_context=ComputeContexts.spark,
                                                         ),

        A.PARTIAL_SentenceDetectorDLApproach: partial(NluComponent,
                                                      name=A.PARTIAL_ChunkMergeApproach,
                                                      jsl_anno_class_id=A.PARTIAL_SentenceDetectorDLApproach,
                                                      jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                          A.PARTIAL_SentenceDetectorDLApproach],
                                                      node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                                      type=T.PARTIALLY_READY,
                                                      pdf_extractor_methods={
                                                          'default': default_partial_implement_config,
                                                          'default_full': default_full_config, },
                                                      pdf_col_name_substitutor=partially_implemented_substitutor,
                                                      output_level=L.DOCUMENT,
                                                      description='Not fully integrated',
                                                      provider=ComponentBackends.open_source,
                                                      license=Licenses.open_source,
                                                      computation_context=ComputeContexts.spark,
                                                      output_context=ComputeContexts.spark,
                                                      ),

        A.PARTIAL_SentimentDetector: partial(NluComponent,
                                             name=A.PARTIAL_ChunkMergeApproach,
                                             jsl_anno_class_id=A.PARTIAL_SentimentDetector,
                                             jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_SentimentDetector],
                                             node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                             type=T.PARTIALLY_READY,
                                             pdf_extractor_methods={'default': default_partial_implement_config,
                                                                    'default_full': default_full_config, },
                                             pdf_col_name_substitutor=partially_implemented_substitutor,
                                             output_level=L.DOCUMENT,
                                             description='Not fully integrated',
                                             provider=ComponentBackends.open_source,
                                             license=Licenses.open_source,
                                             computation_context=ComputeContexts.spark,
                                             output_context=ComputeContexts.spark,
                                             ),

        A.PARTIAL_ViveknSentimentApproach: partial(NluComponent,
                                                   name=A.PARTIAL_ChunkMergeApproach,
                                                   jsl_anno_class_id=A.PARTIAL_ViveknSentimentApproach,
                                                   jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                       A.PARTIAL_ViveknSentimentApproach],
                                                   node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                                   type=T.PARTIALLY_READY,
                                                   pdf_extractor_methods={'default': default_partial_implement_config,
                                                                          'default_full': default_full_config, },
                                                   pdf_col_name_substitutor=partially_implemented_substitutor,
                                                   output_level=L.DOCUMENT,
                                                   description='Not fully integrated',
                                                   provider=ComponentBackends.open_source,
                                                   license=Licenses.open_source,
                                                   computation_context=ComputeContexts.spark,
                                                   output_context=ComputeContexts.spark,
                                                   ),

        A.PARTIAL_SymmetricDeleteApproach: partial(NluComponent,
                                                   name=A.PARTIAL_ChunkMergeApproach,
                                                   jsl_anno_class_id=A.PARTIAL_SymmetricDeleteApproach,
                                                   jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                       A.PARTIAL_SymmetricDeleteApproach],
                                                   node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                                   type=T.PARTIALLY_READY,
                                                   pdf_extractor_methods={'default': default_partial_implement_config,
                                                                          'default_full': default_full_config, },
                                                   pdf_col_name_substitutor=partially_implemented_substitutor,
                                                   output_level=L.DOCUMENT,
                                                   description='Not fully integrated',
                                                   provider=ComponentBackends.open_source,
                                                   license=Licenses.open_source,
                                                   computation_context=ComputeContexts.spark,
                                                   output_context=ComputeContexts.spark,
                                                   ),

        A.PARTIAL_ChunkTokenizer: partial(NluComponent,
                                          name=A.PARTIAL_ChunkMergeApproach,
                                          jsl_anno_class_id=A.PARTIAL_ChunkTokenizer,
                                          jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_ChunkTokenizer],
                                          node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                          type=T.PARTIALLY_READY,
                                          pdf_extractor_methods={'default': default_partial_implement_config,
                                                                 'default_full': default_full_config, },
                                          pdf_col_name_substitutor=partially_implemented_substitutor,
                                          output_level=L.DOCUMENT,
                                          description='Not fully integrated',
                                          provider=ComponentBackends.open_source,
                                          license=Licenses.open_source,
                                          computation_context=ComputeContexts.spark,
                                          output_context=ComputeContexts.spark,
                                          ),

        A.PARTIAL_ChunkTokenizerModel: partial(NluComponent,
                                               name=A.PARTIAL_ChunkMergeApproach,
                                               jsl_anno_class_id=A.PARTIAL_ChunkTokenizerModel,
                                               jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_ChunkTokenizerModel],
                                               node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                               type=T.PARTIALLY_READY,
                                               pdf_extractor_methods={'default': default_partial_implement_config,
                                                                      'default_full': default_full_config, },
                                               pdf_col_name_substitutor=partially_implemented_substitutor,
                                               output_level=L.DOCUMENT,
                                               description='Not fully integrated',
                                               provider=ComponentBackends.open_source,
                                               license=Licenses.open_source,
                                               computation_context=ComputeContexts.spark,
                                               output_context=ComputeContexts.spark,
                                               ),

        A.PARTIAL_RecursiveTokenizer: partial(NluComponent,
                                              name=A.PARTIAL_ChunkMergeApproach,
                                              jsl_anno_class_id=A.PARTIAL_RecursiveTokenizer,
                                              jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_RecursiveTokenizer],
                                              node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                              type=T.PARTIALLY_READY,
                                              pdf_extractor_methods={'default': default_partial_implement_config,
                                                                     'default_full': default_full_config, },
                                              pdf_col_name_substitutor=partially_implemented_substitutor,
                                              output_level=L.DOCUMENT,
                                              description='Not fully integrated',
                                              provider=ComponentBackends.open_source,
                                              license=Licenses.open_source,
                                              computation_context=ComputeContexts.spark,
                                              output_context=ComputeContexts.spark,
                                              ),

        A.PARTIAL_RecursiveTokenizerModel: partial(NluComponent,
                                                   name=A.PARTIAL_ChunkMergeApproach,
                                                   jsl_anno_class_id=A.PARTIAL_RecursiveTokenizerModel,
                                                   jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                       A.PARTIAL_RecursiveTokenizerModel],
                                                   node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                                   type=T.PARTIALLY_READY,
                                                   pdf_extractor_methods={'default': default_partial_implement_config,
                                                                          'default_full': default_full_config, },
                                                   pdf_col_name_substitutor=partially_implemented_substitutor,
                                                   output_level=L.DOCUMENT,
                                                   description='Not fully integrated',
                                                   provider=ComponentBackends.open_source,
                                                   license=Licenses.open_source,
                                                   computation_context=ComputeContexts.spark,
                                                   output_context=ComputeContexts.spark,
                                                   ),

        A.PARTIAL_Token2Chunk: partial(NluComponent,
                                       name=A.PARTIAL_ChunkMergeApproach,
                                       jsl_anno_class_id=A.PARTIAL_Token2Chunk,
                                       jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_Token2Chunk],
                                       node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                       type=T.PARTIALLY_READY,
                                       pdf_extractor_methods={'default': default_partial_implement_config,
                                                              'default_full': default_full_config, },
                                       pdf_col_name_substitutor=partially_implemented_substitutor,
                                       output_level=L.DOCUMENT,
                                       description='Not fully integrated',
                                       provider=ComponentBackends.open_source,
                                       license=Licenses.open_source,
                                       computation_context=ComputeContexts.spark,
                                       output_context=ComputeContexts.spark,
                                       ),

        A.PARTIAL_WordSegmenterApproach: partial(NluComponent,
                                                 name=A.PARTIAL_ChunkMergeApproach,
                                                 jsl_anno_class_id=A.PARTIAL_WordSegmenterApproach,
                                                 jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                     A.PARTIAL_WordSegmenterApproach],
                                                 node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                                 type=T.PARTIALLY_READY,
                                                 pdf_extractor_methods={'default': default_partial_implement_config,
                                                                        'default_full': default_full_config, },
                                                 pdf_col_name_substitutor=partially_implemented_substitutor,
                                                 output_level=L.DOCUMENT,
                                                 description='Not fully integrated',
                                                 provider=ComponentBackends.open_source,
                                                 license=Licenses.open_source,
                                                 computation_context=ComputeContexts.spark,
                                                 output_context=ComputeContexts.spark,
                                                 ),

        A.PARTIAL_GraphExtraction: partial(NluComponent,
                                           name=A.PARTIAL_ChunkMergeApproach,
                                           jsl_anno_class_id=A.PARTIAL_GraphExtraction,
                                           jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_GraphExtraction],
                                           node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                           type=T.PARTIALLY_READY,
                                           pdf_extractor_methods={'default': default_partial_implement_config,
                                                                  'default_full': default_full_config, },
                                           pdf_col_name_substitutor=partially_implemented_substitutor,
                                           output_level=L.DOCUMENT,
                                           description='Not fully integrated',
                                           provider=ComponentBackends.open_source,
                                           license=Licenses.open_source,
                                           computation_context=ComputeContexts.spark,
                                           output_context=ComputeContexts.spark,
                                           ),

        A.PARTIAL_Lemmatizer: partial(NluComponent,
                                      name=A.PARTIAL_ChunkMergeApproach,
                                      jsl_anno_class_id=A.PARTIAL_Lemmatizer,
                                      jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_Lemmatizer],
                                      node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                      type=T.PARTIALLY_READY,
                                      pdf_extractor_methods={'default': default_partial_implement_config,
                                                             'default_full': default_full_config, },
                                      pdf_col_name_substitutor=partially_implemented_substitutor,
                                      output_level=L.DOCUMENT,
                                      description='Not fully integrated',
                                      provider=ComponentBackends.open_source,
                                      license=Licenses.open_source,
                                      computation_context=ComputeContexts.spark,
                                      output_context=ComputeContexts.spark,
                                      ),

        A.PARTIAL_Normalizer: partial(NluComponent,
                                      name=A.PARTIAL_ChunkMergeApproach,
                                      jsl_anno_class_id=A.PARTIAL_Normalizer,
                                      jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_Normalizer],
                                      node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                      type=T.PARTIALLY_READY,
                                      pdf_extractor_methods={'default': default_partial_implement_config,
                                                             'default_full': default_full_config, },
                                      pdf_col_name_substitutor=partially_implemented_substitutor,
                                      output_level=L.DOCUMENT,
                                      description='Not fully integrated',
                                      provider=ComponentBackends.open_source,
                                      license=Licenses.open_source,
                                      computation_context=ComputeContexts.spark,
                                      output_context=ComputeContexts.spark,
                                      ),

        #### Open Source
        A.CHUNK2DOC: partial(NluComponent,
                             name=A.CHUNK2DOC,
                             type=T.HELPER_ANNO,
                             get_default_model=Chunk_2_Doc.get_default_model,
                             pdf_extractor_methods={'default_full': default_full_config, },
                             # 'default': '',   TODO no extractor
                             pdf_col_name_substitutor=substitute_doc2chunk_cols,
                             output_level=L.DOCUMENT,
                             node=NLP_FEATURE_NODES.nodes[A.CHUNK2DOC],
                             description='TODO',
                             provider=ComponentBackends.open_source,
                             license=Licenses.open_source,
                             computation_context=ComputeContexts.spark,
                             output_context=ComputeContexts.spark,
                             jsl_anno_class_id=A.CHUNK2DOC,
                             jsl_anno_py_class=ACR.JSL_anno2_py_class[A.CHUNK2DOC],

                             ),
        A.CHUNK_EMBEDDINGS_CONVERTER: partial(NluComponent,
                                              name=A.CHUNK_EMBEDDINGS_CONVERTER,
                                              type=T.HELPER_ANNO,
                                              get_default_model=ChunkEmbedder.get_default_model,
                                              pdf_extractor_methods={'default': default_chunk_embedding_config,
                                                                     'default_full': default_full_config, },
                                              # TODO no extractor
                                              pdf_col_name_substitutor=substitute_chunk_embed_cols,
                                              output_level=L.CHUNK,
                                              node=NLP_FEATURE_NODES.nodes[A.CHUNK_EMBEDDINGS_CONVERTER],
                                              description='Convert Chunks to Doc type col',
                                              provider=ComponentBackends.open_source,
                                              license=Licenses.open_source,
                                              computation_context=ComputeContexts.spark,
                                              output_context=ComputeContexts.spark,
                                              jsl_anno_class_id=A.CHUNK_EMBEDDINGS_CONVERTER,
                                              jsl_anno_py_class=ACR.JSL_anno2_py_class[A.CHUNK_EMBEDDINGS_CONVERTER],
                                              is_storage_ref_producer=True,
                                              has_storage_ref=True,
                                              ),

        A.BERT_SENTENCE_CHUNK_EMBEDDINGS: partial(NluComponent,
                                                  name=A.BERT_SENTENCE_CHUNK_EMBEDDINGS,
                                                  type=T.CHUNK_EMBEDDING,
                                                  get_default_model=BertSentenceChunkEmbeds.get_default_model,
                                                  get_pretrained_model=BertSentenceChunkEmbeds.get_pretrained_model,
                                                  pdf_extractor_methods={'default': default_chunk_embedding_config,
                                                                         'default_full': default_full_config, },
                                                  pdf_col_name_substitutor=substitute_chunk_embed_cols,
                                                  output_level=L.TOKEN,
                                                  node=NLP_FEATURE_NODES.nodes[A.BERT_SENTENCE_CHUNK_EMBEDDINGS],
                                                  description='Converts NER chunks into Chunk Embeddings generated from sentence embedder',
                                                  provider=ComponentBackends.open_source,
                                                  license=Licenses.open_source,
                                                  computation_context=ComputeContexts.spark,
                                                  output_context=ComputeContexts.spark,
                                                  jsl_anno_class_id=A.BERT_SENTENCE_CHUNK_EMBEDDINGS,
                                                  jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                      A.BERT_SENTENCE_CHUNK_EMBEDDINGS],
                                                  is_storage_ref_producer=True,
                                                  has_storage_ref=True,
                                                  ),

        # TODO just placeholder
        A.TRAINABLE_TOKENIZER: partial(NluComponent,
                                       name=A.POS,
                                       type=T.TOKEN_CLASSIFIER,
                                       get_default_model=RegexTokenizer.get_default_model,
                                       pdf_extractor_methods={'default': default_tokenizer_config,
                                                              'default_full': default_full_config, },
                                       pdf_col_name_substitutor=substitute_tokenizer_cols,
                                       output_level=L.TOKEN,
                                       node=NLP_FEATURE_NODES.nodes[A.POS],
                                       description='todo',
                                       provider=ComponentBackends.open_source,
                                       license=Licenses.open_source,
                                       computation_context=ComputeContexts.spark,
                                       output_context=ComputeContexts.spark,
                                       jsl_anno_class_id=A.REGEX_TOKENIZER,
                                       jsl_anno_py_class=ACR.JSL_anno2_py_class[A.REGEX_TOKENIZER],
                                       ),

        A.CHUNKER: partial(NluComponent,
                           name=A.CHUNKER,
                           type=T.CHUNK_CLASSIFIER,
                           get_default_model=DefaultChunker.get_default_model,
                           pdf_extractor_methods={'default': default_chunk_config,
                                                  'default_full': default_full_config, },
                           pdf_col_name_substitutor=substitute_chunk_cols,
                           output_level=L.CHUNK,
                           node=NLP_FEATURE_NODES.nodes[A.CHUNKER],
                           description='Regex matcher that matches patters defined by part-of-speech (POS) tags',
                           provider=ComponentBackends.open_source,
                           license=Licenses.open_source,
                           computation_context=ComputeContexts.spark,
                           output_context=ComputeContexts.spark,
                           jsl_anno_class_id=A.CHUNKER,
                           jsl_anno_py_class=ACR.JSL_anno2_py_class[A.CHUNKER],

                           ),
        A.CLASSIFIER_DL: partial(NluComponent,
                                 name=A.CLASSIFIER_DL,
                                 type=T.DOCUMENT_CLASSIFIER,
                                 get_default_model=ClassifierDl.get_default_model,
                                 get_pretrained_model=ClassifierDl.get_pretrained_model,
                                 get_trainable_model=ClassifierDl.get_trainable_model,
                                 pdf_extractor_methods={'default': default_classifier_dl_config,
                                                        'default_full': default_full_config, },
                                 pdf_col_name_substitutor=substitute_classifier_dl_cols,
                                 output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                 node=NLP_FEATURE_NODES.nodes[A.CLASSIFIER_DL],
                                 description='Deep Learning based general classifier for many problems',
                                 provider=ComponentBackends.open_source,
                                 license=Licenses.open_source,
                                 computation_context=ComputeContexts.spark,
                                 output_context=ComputeContexts.spark,
                                 jsl_anno_class_id=A.CLASSIFIER_DL,
                                 jsl_anno_py_class=ACR.JSL_anno2_py_class[A.CLASSIFIER_DL],
                                 has_storage_ref=True,
                                 is_storage_ref_consumer=True,
                                 trainable_mirror_anno=A.TRAINABLE_CLASSIFIER_DL,
                                 ),
        A.TRAINABLE_CLASSIFIER_DL: partial(NluComponent,
                                           name=A.TRAINABLE_CLASSIFIER_DL,
                                           type=T.DOCUMENT_CLASSIFIER,
                                           get_default_model=ClassifierDl.get_default_model,
                                           get_pretrained_model=ClassifierDl.get_pretrained_model,
                                           get_trainable_model=ClassifierDl.get_trainable_model,
                                           pdf_extractor_methods={'default': default_classifier_dl_config,
                                                                  'default_full': default_full_config, },
                                           pdf_col_name_substitutor=substitute_classifier_dl_cols,
                                           output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                           node=NLP_FEATURE_NODES.nodes[A.TRAINABLE_CLASSIFIER_DL],
                                           description='Deep Learning based general classifier for many problems',
                                           provider=ComponentBackends.open_source,
                                           license=Licenses.open_source,
                                           computation_context=ComputeContexts.spark,
                                           output_context=ComputeContexts.spark,
                                           jsl_anno_class_id=A.TRAINABLE_CLASSIFIER_DL,
                                           jsl_anno_py_class=ACR.JSL_anno2_py_class[A.TRAINABLE_CLASSIFIER_DL],
                                           has_storage_ref=True,
                                           is_storage_ref_consumer=True,
                                           trainable=True,
                                           trained_mirror_anno=A.CLASSIFIER_DL,
                                           ),
        A.CONTEXT_SPELL_CHECKER: partial(NluComponent,
                                         name=A.CONTEXT_SPELL_CHECKER,
                                         type=T.SPELL_CHECKER,
                                         get_default_model=ContextSpellChecker.get_default_model,
                                         get_pretrained_model=ContextSpellChecker.get_pretrained_model,
                                         get_trainable_model=ContextSpellChecker.get_default_trainable_model,
                                         pdf_extractor_methods={'default': default_spell_context_config,
                                                                'default_full': default_full_config, },
                                         pdf_col_name_substitutor=substitute_spell_context_cols,
                                         output_level=L.TOKEN,
                                         node=NLP_FEATURE_NODES.nodes[A.CONTEXT_SPELL_CHECKER],
                                         description='Deep Learning based spell checker that uses context to predict correct corrections.',
                                         provider=ComponentBackends.open_source,
                                         license=Licenses.open_source,
                                         computation_context=ComputeContexts.spark,
                                         output_context=ComputeContexts.spark,
                                         jsl_anno_class_id=A.CONTEXT_SPELL_CHECKER,
                                         jsl_anno_py_class=ACR.JSL_anno2_py_class[A.CONTEXT_SPELL_CHECKER],
                                         trainable_mirror_anno=A.TRAINABLE_CONTEXT_SPELL_CHECKER,
                                         ),
        A.UNTYPED_DEPENDENCY_PARSER: partial(NluComponent,
                                             name=A.UNTYPED_DEPENDENCY_PARSER,
                                             type=T.TOKEN_CLASSIFIER,
                                             get_default_model=LabeledDependencyParser.get_default_model,
                                             get_pretrained_model=LabeledDependencyParser.get_pretrained_model,
                                             get_trainable_model=LabeledDependencyParser.get_default_trainable_model,
                                             pdf_extractor_methods={'default': default_dep_typed_config,
                                                                    'default_full': default_full_config, },
                                             pdf_col_name_substitutor=substitute_labled_dependency_cols,
                                             output_level=L.TOKEN,
                                             node=NLP_FEATURE_NODES.nodes[A.UNTYPED_DEPENDENCY_PARSER],
                                             description='todo',
                                             provider=ComponentBackends.open_source,
                                             license=Licenses.open_source,
                                             computation_context=ComputeContexts.spark,
                                             output_context=ComputeContexts.spark,
                                             jsl_anno_class_id=A.UNTYPED_DEPENDENCY_PARSER,
                                             jsl_anno_py_class=ACR.JSL_anno2_py_class[A.UNTYPED_DEPENDENCY_PARSER],
                                             trainable_mirror_anno=A.TRAINABLE_DEP_PARSE_UN_TYPED,
                                             ),
        A.TYPED_DEPENDENCY_PARSER: partial(NluComponent,
                                           name=A.TYPED_DEPENDENCY_PARSER,
                                           type=T.TOKEN_CLASSIFIER,
                                           get_default_model=UnlabeledDependencyParser.get_default_model,
                                           get_pretrained_model=UnlabeledDependencyParser.get_pretrained_model,
                                           get_trainable_model=UnlabeledDependencyParser.get_default_trainable_model,
                                           pdf_extractor_methods={'default': default_dep_untyped_config,
                                                                  'default_full': default_full_config, },
                                           pdf_col_name_substitutor=substitute_un_labled_dependency_cols,
                                           output_level=L.TOKEN,
                                           node=NLP_FEATURE_NODES.nodes[A.TYPED_DEPENDENCY_PARSER],
                                           description='todo',
                                           provider=ComponentBackends.open_source,
                                           license=Licenses.open_source,
                                           computation_context=ComputeContexts.spark,
                                           output_context=ComputeContexts.spark,
                                           jsl_anno_class_id=A.TYPED_DEPENDENCY_PARSER,
                                           jsl_anno_py_class=ACR.JSL_anno2_py_class[A.TYPED_DEPENDENCY_PARSER],
                                           trainable_mirror_anno=A.TRAINABLE_DEP_PARSE_TYPED,
                                           ),
        A.DOC2CHUNK: partial(NluComponent,
                             name=A.DOC2CHUNK,
                             type=T.HELPER_ANNO,
                             get_default_model=Doc_2_Chunk.get_default_model,
                             pdf_extractor_methods={'default': default_doc2chunk_config,
                                                    'default_full': default_full_config, },
                             pdf_col_name_substitutor=substitute_doc2chunk_cols,
                             output_level=L.CHUNK,
                             node=NLP_FEATURE_NODES.nodes[A.DOC2CHUNK],
                             description='Converts Document type col to Chunk type col',
                             provider=ComponentBackends.open_source,
                             license=Licenses.open_source,
                             computation_context=ComputeContexts.spark,
                             output_context=ComputeContexts.spark,
                             jsl_anno_class_id=A.DOC2CHUNK,
                             jsl_anno_py_class=ACR.JSL_anno2_py_class[A.DOC2CHUNK],
                             ),
        A.DOCUMENT_ASSEMBLER: partial(NluComponent,
                                      name=A.DOCUMENT_ASSEMBLER,
                                      type=T.HELPER_ANNO,
                                      get_default_model=SparkNlpDocumentAssembler.get_default_model,
                                      pdf_extractor_methods={'default': default_document_config,
                                                             'default_full': default_full_config},
                                      pdf_col_name_substitutor=substitute_doc_assembler_cols,
                                      output_level=L.DOCUMENT,
                                      node=NLP_FEATURE_NODES.nodes[A.DOCUMENT_ASSEMBLER],
                                      description='todo',
                                      provider=ComponentBackends.open_source,
                                      license=Licenses.open_source,
                                      computation_context=ComputeContexts.spark,
                                      output_context=ComputeContexts.spark,
                                      jsl_anno_class_id=A.DOCUMENT_ASSEMBLER,
                                      jsl_anno_py_class=ACR.JSL_anno2_py_class[A.DOCUMENT_ASSEMBLER],
                                      ),
        A.AUDIO_ASSEMBLER: partial(NluComponent,
                                   name=A.AUDIO_ASSEMBLER,
                                   type=T.HELPER_ANNO,
                                   get_default_model=AudioAssembler_.get_default_model,
                                   pdf_extractor_methods={'default': default_only_result_popped_config,
                                                          'default_full': default_full_config, },
                                   pdf_col_name_substitutor=audio_assembler_cols,
                                   output_level=L.AUDIO_SERIES,
                                   node=NLP_FEATURE_NODES.nodes[A.AUDIO_ASSEMBLER],
                                   description='todo',
                                   provider=ComponentBackends.open_source,
                                   license=Licenses.open_source,
                                   computation_context=ComputeContexts.spark,
                                   output_context=ComputeContexts.spark,
                                   jsl_anno_class_id=A.AUDIO_ASSEMBLER,
                                   jsl_anno_py_class=ACR.JSL_anno2_py_class[A.AUDIO_ASSEMBLER],
                                   ),
        A.WAV2VEC_FOR_CTC: partial(NluComponent,
                                   name=A.WAV2VEC_FOR_CTC,
                                   type=T.SPEECH_RECOGNIZER,
                                   get_default_model=Wav2Vec.get_default_model,
                                   get_pretrained_model=Wav2Vec.get_pretrained_model,
                                   pdf_extractor_methods={'default': default_only_result_config,
                                                          'default_full': default_full_config, },
                                   pdf_col_name_substitutor=substitute_wav2vec_cols,
                                   output_level=L.DOCUMENT,
                                   node=NLP_FEATURE_NODES.nodes[A.WAV2VEC_FOR_CTC],
                                   description='todo',
                                   provider=ComponentBackends.open_source,
                                   license=Licenses.open_source,
                                   computation_context=ComputeContexts.spark,
                                   output_context=ComputeContexts.spark,
                                   jsl_anno_class_id=A.WAV2VEC_FOR_CTC,
                                   jsl_anno_py_class=ACR.JSL_anno2_py_class[A.WAV2VEC_FOR_CTC],
                                   # Bas on Librosa which uses http://www.mega-nerd.com/libsndfile/
                                   applicable_file_types=['wav', 'mp3', 'flac', 'aiff', 'aifc', 'ogg', 'aflac', 'alac',
                                                          'dsd', 'pcm', ]
                                   ),

        A.HUBERT_FOR_CTC: partial(NluComponent,
                                  name=A.HUBERT_FOR_CTC,
                                  type=T.SPEECH_RECOGNIZER,
                                  get_default_model=Hubert.get_default_model,
                                  get_pretrained_model=Hubert.get_pretrained_model,
                                  pdf_extractor_methods={'default': default_only_result_config,
                                                         'default_full': default_full_config, },
                                  pdf_col_name_substitutor=substitute_wav2vec_cols,
                                  output_level=L.DOCUMENT,
                                  node=NLP_FEATURE_NODES.nodes[A.HUBERT_FOR_CTC],
                                  description='todo',
                                  provider=ComponentBackends.open_source,
                                  license=Licenses.open_source,
                                  computation_context=ComputeContexts.spark,
                                  output_context=ComputeContexts.spark,
                                  jsl_anno_class_id=A.HUBERT_FOR_CTC,
                                  jsl_anno_py_class=ACR.JSL_anno2_py_class[A.HUBERT_FOR_CTC],
                                  # Bas on Librosa which uses http://www.mega-nerd.com/libsndfile/
                                  applicable_file_types=['wav', 'mp3', 'flac', 'aiff', 'aifc', 'ogg', 'aflac', 'alac',
                                                         'dsd', 'pcm', ]
                                  ),
        A.WHISPER_FOR_CTC: partial(NluComponent,
                                   name=A.WHISPER_FOR_CTC,
                                   type=T.SPEECH_RECOGNIZER,
                                   get_default_model=Whisper.get_default_model,
                                   get_pretrained_model=Whisper.get_pretrained_model,
                                   pdf_extractor_methods={'default': default_only_result_config,
                                                          'default_full': default_full_config, },
                                   pdf_col_name_substitutor=substitute_wav2vec_cols,
                                   output_level=L.DOCUMENT,
                                   node=NLP_FEATURE_NODES.nodes[A.WHISPER_FOR_CTC],
                                   description='Whisper is an automatic speech recognition (ASR) system trained on 680,000 hours of multilingual and multitask supervised data collected from the web. It transcribe in multiple languages, as well as translate from those languages into English.',
                                   provider=ComponentBackends.open_source,
                                   license=Licenses.open_source,
                                   computation_context=ComputeContexts.spark,
                                   output_context=ComputeContexts.spark,
                                   jsl_anno_class_id=A.WHISPER_FOR_CTC,
                                   jsl_anno_py_class=ACR.JSL_anno2_py_class[A.WHISPER_FOR_CTC],
                                   # Bas on Librosa which uses http://www.mega-nerd.com/libsndfile/
                                   applicable_file_types=['wav', 'mp3', 'flac', 'aiff', 'aifc', 'ogg', 'aflac', 'alac',
                                                          'dsd', 'pcm', ]
                                   ),

        A.TAPAS_FOR_QA: partial(NluComponent,
                                name=A.TAPAS_FOR_QA,
                                type=T.QUESTION_TABLE_ANSWERER,
                                get_default_model=TapasQA.get_default_model,
                                get_pretrained_model=TapasQA.get_pretrained_model,
                                pdf_extractor_methods={
                                    'default': default_tapas_config,
                                    'default_full': default_full_config, },
                                pdf_col_name_substitutor=substitute_tapas_qa_cols,
                                output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                node=NLP_FEATURE_NODES.nodes[A.TAPAS_FOR_QA],
                                description='todo',
                                provider=ComponentBackends.open_source,
                                license=Licenses.open_source,
                                computation_context=ComputeContexts.spark,
                                output_context=ComputeContexts.spark,
                                jsl_anno_class_id=A.TAPAS_FOR_QA,
                                jsl_anno_py_class=ACR.JSL_anno2_py_class[A.TAPAS_FOR_QA],
                                ),

        A.TABLE_ASSEMBLER: partial(NluComponent,
                                   name=A.TABLE_ASSEMBLER,
                                   type=T.HELPER_ANNO,
                                   get_default_model=SparkNlpTableAssembler.get_default_model,
                                   pdf_extractor_methods={'default': default_only_result_config,  # TODO
                                                          'default_full': default_full_config, },
                                   pdf_col_name_substitutor=substitute_wav2vec_cols,  # TODO
                                   output_level=L.DOCUMENT,
                                   node=NLP_FEATURE_NODES.nodes[A.TABLE_ASSEMBLER],
                                   description='todo',
                                   provider=ComponentBackends.open_source,
                                   license=Licenses.open_source,
                                   computation_context=ComputeContexts.spark,
                                   output_context=ComputeContexts.spark,
                                   jsl_anno_class_id=A.TABLE_ASSEMBLER,
                                   jsl_anno_py_class=ACR.JSL_anno2_py_class[A.TABLE_ASSEMBLER],
                                   applicable_file_types=['csv', 'json']  # or str/pd format
                                   ),

        A.DOCUMENT_NORMALIZER: partial(NluComponent,
                                       name=A.DOCUMENT_NORMALIZER,
                                       type=T.TEXT_NORMALIZER,
                                       get_default_model=SparkNLPDocumentNormalizer.get_default_model,
                                       pdf_extractor_methods={'default': default_norm_document_config,
                                                              'default_full': default_full_config, },
                                       pdf_col_name_substitutor=substitute_doc_norm_cols,
                                       output_level=L.DOCUMENT,
                                       node=NLP_FEATURE_NODES.nodes[A.DOCUMENT_NORMALIZER],
                                       description='todo',
                                       provider=ComponentBackends.open_source,
                                       license=Licenses.open_source,
                                       computation_context=ComputeContexts.spark,
                                       output_context=ComputeContexts.spark,
                                       jsl_anno_class_id=A.DOCUMENT_NORMALIZER,
                                       jsl_anno_py_class=ACR.JSL_anno2_py_class[A.DOCUMENT_NORMALIZER],
                                       ),

        A.FINISHER: partial(NluComponent,
                            name=A.FINISHER,
                            type=T.HELPER_ANNO,
                            get_default_model=SdfFinisher.get_default_model,
                            pdf_extractor_methods={'default': default_full_config,
                                                   'default_full': default_full_config, },
                            pdf_col_name_substitutor=substitute_finisher_cols,
                            output_level=L.DOCUMENT,
                            node=NLP_FEATURE_NODES.nodes[A.FINISHER],
                            description='Finisher transformer to output the results of a pipeline.',
                            provider=ComponentBackends.open_source,
                            license=Licenses.open_source,
                            computation_context=ComputeContexts.spark,
                            output_context=ComputeContexts.spark,
                            jsl_anno_class_id=A.FINISHER,
                            jsl_anno_py_class=ACR.JSL_anno2_py_class[A.FINISHER],
                            ),
        A.LANGUAGE_DETECTOR_DL: partial(NluComponent,
                                        name=A.LANGUAGE_DETECTOR_DL,
                                        type=T.DOCUMENT_CLASSIFIER,
                                        get_default_model=LanguageDetector.get_default_model,
                                        get_pretrained_model=LanguageDetector.get_pretrained_model,
                                        pdf_extractor_methods={'default': default_lang_classifier_config,
                                                               'default_full': default_full_config, },
                                        pdf_col_name_substitutor=None,  # TODO no sub defined
                                        output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                        # TODO sub-token actually(?)
                                        node=NLP_FEATURE_NODES.nodes[A.LANGUAGE_DETECTOR_DL],
                                        description='Get lemmatized base version of tokens',
                                        provider=ComponentBackends.open_source,
                                        license=Licenses.open_source,
                                        computation_context=ComputeContexts.spark,
                                        output_context=ComputeContexts.spark,
                                        jsl_anno_class_id=A.LANGUAGE_DETECTOR_DL,
                                        jsl_anno_py_class=ACR.JSL_anno2_py_class[A.LANGUAGE_DETECTOR_DL],
                                        ),
        A.LEMMATIZER: partial(NluComponent,
                              name=A.LEMMATIZER,
                              type=T.TOKEN_NORMALIZER,
                              output_context=ComputeContexts.spark,
                              get_default_model=SparkNLPLemmatizer.get_default_model,
                              get_pretrained_model=SparkNLPLemmatizer.get_pretrained_model,
                              get_trainable_model=SparkNLPLemmatizer.get_default_trainable_model,
                              pdf_extractor_methods={'default': default_lemma_config,
                                                     'default_full': default_full_config, },
                              pdf_col_name_substitutor=substitute_lem_cols,
                              output_level=L.TOKEN,  # TODO sub-token actually(?)
                              node=NLP_FEATURE_NODES.nodes[A.LEMMATIZER],
                              description='Get lemmatized base version of tokens',
                              provider=ComponentBackends.open_source,
                              license=Licenses.open_source,
                              computation_context=ComputeContexts.spark,
                              jsl_anno_class_id=A.LEMMATIZER,
                              jsl_anno_py_class=ACR.JSL_anno2_py_class[A.LEMMATIZER],
                              trainable_mirror_anno=A.TRAINABLE_LEMMATIZER
                              ),
        A.MULTI_CLASSIFIER_DL: partial(NluComponent,
                                       name=A.MULTI_CLASSIFIER_DL,
                                       type=T.DOCUMENT_CLASSIFIER,
                                       output_level=L.MULTI_TOKEN_CLASSIFIER,
                                       get_default_model=MultiClassifier.get_default_model,
                                       get_pretrained_model=MultiClassifier.get_pretrained_model,
                                       get_trainable_model=MultiClassifier.get_default_trainable_model,
                                       pdf_extractor_methods={'default': default_multi_classifier_dl_config,
                                                              'default_full': default_full_config, },
                                       pdf_col_name_substitutor=substitute_multi_classifier_dl_cols,
                                       node=NLP_FEATURE_NODES.nodes[A.MULTI_CLASSIFIER_DL],
                                       description='Deep Learning based general classifier for multi-label classification problem. I.e. problems, where one document may be labled with multiple labels at the same time.',
                                       provider=ComponentBackends.open_source,
                                       license=Licenses.open_source,
                                       computation_context=ComputeContexts.spark,
                                       output_context=ComputeContexts.spark,
                                       jsl_anno_class_id=A.MULTI_CLASSIFIER_DL,
                                       jsl_anno_py_class=ACR.JSL_anno2_py_class[A.MULTI_CLASSIFIER_DL],
                                       has_storage_ref=True,
                                       is_storage_ref_consumer=True,
                                       trainable_mirror_anno=A.TRAINABLE_MULTI_CLASSIFIER_DL,
                                       ),

        A.TRAINABLE_MULTI_CLASSIFIER_DL: partial(NluComponent,
                                                 name=A.TRAINABLE_MULTI_CLASSIFIER_DL,
                                                 type=T.DOCUMENT_CLASSIFIER,
                                                 output_level=L.MULTI_TOKEN_CLASSIFIER,
                                                 get_default_model=MultiClassifier.get_default_model,
                                                 get_pretrained_model=MultiClassifier.get_pretrained_model,
                                                 get_trainable_model=MultiClassifier.get_default_trainable_model,
                                                 pdf_extractor_methods={'default': default_multi_classifier_dl_config,
                                                                        'default_full': default_full_config, },
                                                 pdf_col_name_substitutor=substitute_multi_classifier_dl_cols,
                                                 node=NLP_FEATURE_NODES.nodes[A.TRAINABLE_MULTI_CLASSIFIER_DL],
                                                 description='Trainable Deep Learning based general classifier for multi-label classification problem. I.e. problems, where one document may be labled with multiple labels at the same time.',
                                                 provider=ComponentBackends.open_source,
                                                 license=Licenses.open_source,
                                                 computation_context=ComputeContexts.spark,
                                                 output_context=ComputeContexts.spark,
                                                 jsl_anno_class_id=A.TRAINABLE_MULTI_CLASSIFIER_DL,
                                                 jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                     A.TRAINABLE_MULTI_CLASSIFIER_DL],
                                                 has_storage_ref=True,
                                                 is_storage_ref_consumer=True,
                                                 trainable=True,
                                                 trained_mirror_anno=A.CLASSIFIER_DL,
                                                 # Should be A.MULTI_CLASSIFIER_DL, but fitted class is actually classifier DL, special edge case
                                                 ),

        A.N_GRAMM_GENERATOR: partial(NluComponent,
                                     name=A.N_GRAMM_GENERATOR,
                                     type=T.CHUNK_CLASSIFIER,  # Classify each n-gram wether they match Pattern or not
                                     get_default_model=NGram.get_default_model,
                                     pdf_extractor_methods={'default': default_ngram_config,
                                                            'default_full': default_full_config, },
                                     pdf_col_name_substitutor=substitute_ngram_cols,
                                     output_level=L.CHUNK,
                                     node=NLP_FEATURE_NODES.nodes[A.N_GRAMM_GENERATOR],
                                     description='Extract N-Gram chunks from texts',
                                     provider=ComponentBackends.open_source,
                                     license=Licenses.open_source,
                                     computation_context=ComputeContexts.spark,
                                     output_context=ComputeContexts.spark,
                                     jsl_anno_class_id=A.N_GRAMM_GENERATOR,
                                     jsl_anno_py_class=ACR.JSL_anno2_py_class[A.N_GRAMM_GENERATOR],
                                     ),
        A.NER_CONVERTER: partial(NluComponent,
                                 name=A.NER_CONVERTER,
                                 type=T.HELPER_ANNO,
                                 get_default_model=NerToChunkConverter.get_default_model,
                                 pdf_extractor_methods={'default': default_ner_converter_config,
                                                        'default_full': default_full_config, },
                                 pdf_col_name_substitutor=substitute_ner_converter_cols,
                                 output_level=L.CHUNK,
                                 node=NLP_FEATURE_NODES.nodes[A.NER_CONVERTER],
                                 description='Convert NER-IOB tokens into concatenated strings (aka chunks)',
                                 provider=ComponentBackends.open_source,
                                 license=Licenses.open_source,
                                 computation_context=ComputeContexts.spark,
                                 output_context=ComputeContexts.spark,
                                 jsl_anno_class_id=A.NER_CONVERTER,
                                 jsl_anno_py_class=ACR.JSL_anno2_py_class[A.NER_CONVERTER],
                                 ),
        A.NER_CRF: partial(NluComponent,
                           name=A.NER_CRF,
                           type=T.TOKEN_CLASSIFIER,
                           output_level=L.TOKEN,
                           get_default_model=NERDLCRF.get_default_model,
                           get_pretrained_model=NERDLCRF.get_pretrained_model,
                           get_trainable_model=NERDLCRF.get_default_trainable_model,
                           pdf_extractor_methods={'default': '', 'default_full': default_full_config, },
                           pdf_col_name_substitutor=None,  # TODO
                           node=NLP_FEATURE_NODES.nodes[A.NER_CRF],
                           description='Classical NER model_anno_obj based on conditional random fields (CRF). Predicts IOB tags ',
                           provider=ComponentBackends.open_source,
                           license=Licenses.open_source,
                           computation_context=ComputeContexts.spark,
                           output_context=ComputeContexts.spark,
                           jsl_anno_class_id=A.NER_CRF,
                           jsl_anno_py_class=ACR.JSL_anno2_py_class[A.NER_CRF],
                           trainable_mirror_anno=A.TRAINABLE_NER_CRF,
                           ),
        A.NER_DL: partial(NluComponent,
                          name=A.NER_DL,
                          type=T.TOKEN_CLASSIFIER,
                          output_level=L.TOKEN,
                          get_default_model=NERDL.get_default_model,
                          get_pretrained_model=NERDL.get_pretrained_model,
                          get_trainable_model=NERDL.get_default_trainable_model,
                          pdf_extractor_methods={'default': default_NER_config, 'meta': meta_NER_config,
                                                 'default_full': default_full_config, },
                          pdf_col_name_substitutor=substitute_ner_dl_cols,
                          node=NLP_FEATURE_NODES.nodes[A.NER_DL],
                          description='Deep Learning based NER model_anno_obj that predicts IOB tags. ',
                          provider=ComponentBackends.open_source,
                          license=Licenses.open_source,
                          computation_context=ComputeContexts.spark,
                          output_context=ComputeContexts.spark,
                          jsl_anno_class_id=A.NER_DL,
                          jsl_anno_py_class=ACR.JSL_anno2_py_class[A.NER_DL],
                          trainable_mirror_anno=A.TRAINABLE_NER_DL,
                          has_storage_ref=True,
                          is_storage_ref_consumer=True
                          ),
        A.TRAINABLE_NER_DL: partial(NluComponent,
                                    name=A.TRAINABLE_NER_DL,
                                    type=T.TOKEN_CLASSIFIER,
                                    get_default_model=NERDL.get_default_model,
                                    get_pretrained_model=NERDL.get_pretrained_model,
                                    get_trainable_model=NERDL.get_default_trainable_model,
                                    pdf_extractor_methods={'default': default_NER_config, 'meta': meta_NER_config,
                                                           'default_full': default_full_config, },
                                    pdf_col_name_substitutor=substitute_ner_dl_cols,
                                    output_level=L.TOKEN,
                                    node=NLP_FEATURE_NODES.nodes[A.TRAINABLE_NER_DL],
                                    description='Deep Learning based NER model_anno_obj that predicts IOB tags. ',
                                    provider=ComponentBackends.open_source,
                                    license=Licenses.open_source,
                                    computation_context=ComputeContexts.spark,
                                    output_context=ComputeContexts.spark,
                                    jsl_anno_class_id=A.TRAINABLE_NER_DL,
                                    jsl_anno_py_class=ACR.JSL_anno2_py_class[A.TRAINABLE_NER_DL],
                                    trained_mirror_anno=A.NER_DL,
                                    trainable=True,
                                    has_storage_ref=True,
                                    is_storage_ref_consumer=True
                                    ),

        A.NORMALIZER: partial(NluComponent,
                              name=A.NORMALIZER,
                              type=T.TOKEN_NORMALIZER,
                              get_default_model=SparkNLPNormalizer.get_default_model,
                              get_pretrained_model=SparkNLPNormalizer.get_pretrained_model,
                              # get_trainable_model=SparkNLPLemmatizer.get_default_trainable_model,
                              pdf_extractor_methods={'default': default_norm_config,
                                                     'default_full': default_full_config, },
                              pdf_col_name_substitutor=substitute_norm_cols,
                              output_level=L.TOKEN,  # TODO sub-token actually(?)
                              node=NLP_FEATURE_NODES.nodes[A.NORMALIZER],
                              description='Get lemmatized base version of tokens',
                              provider=ComponentBackends.open_source,
                              license=Licenses.open_source,
                              computation_context=ComputeContexts.spark,
                              output_context=ComputeContexts.spark,
                              jsl_anno_class_id=A.NORMALIZER,
                              jsl_anno_py_class=ACR.JSL_anno2_py_class[A.NORMALIZER],
                              trainable_mirror_anno=A.TRAINABLE_NORMALIZER
                              ),
        A.NORVIG_SPELL_CHECKER: partial(NluComponent,
                                        name=A.NORVIG_SPELL_CHECKER,
                                        type=T.SPELL_CHECKER,
                                        get_default_model=NorvigSpellChecker.get_default_model,
                                        get_pretrained_model=NorvigSpellChecker.get_pretrained_model,
                                        get_trainable_model=NorvigSpellChecker.get_default_trainable_model,
                                        pdf_extractor_methods={'default': default_spell_norvig_config,
                                                               'default_full': default_full_config, },
                                        pdf_col_name_substitutor=substitute_spell_norvig_cols,
                                        output_level=L.TOKEN,  # TODO sub-token actually
                                        node=NLP_FEATURE_NODES.nodes[A.NORVIG_SPELL_CHECKER],
                                        description='Norvig algorithm based Spell Checker',
                                        provider=ComponentBackends.open_source,
                                        license=Licenses.open_source,
                                        computation_context=ComputeContexts.spark,
                                        output_context=ComputeContexts.spark,
                                        jsl_anno_class_id=A.NORVIG_SPELL_CHECKER,
                                        jsl_anno_py_class=ACR.JSL_anno2_py_class[A.NORVIG_SPELL_CHECKER],
                                        trainable_mirror_anno=A.TRAINABLE_NORVIG_SPELL_CHECKER
                                        ),
        A.POS: partial(NluComponent,
                       name=A.POS,
                       type=T.TOKEN_CLASSIFIER,
                       get_default_model=PartOfSpeechJsl.get_default_model,
                       get_pretrained_model=PartOfSpeechJsl.get_pretrained_model,
                       get_trainable_model=PartOfSpeechJsl.get_default_trainable_model,
                       pdf_extractor_methods={'default': default_POS_config, 'default_full': default_full_config, },
                       pdf_col_name_substitutor=substitute_pos_cols,
                       output_level=L.TOKEN,
                       node=NLP_FEATURE_NODES.nodes[A.POS],
                       description='todo',
                       provider=ComponentBackends.open_source,
                       license=Licenses.open_source,
                       computation_context=ComputeContexts.spark,
                       output_context=ComputeContexts.spark,
                       jsl_anno_class_id=A.POS,
                       jsl_anno_py_class=ACR.JSL_anno2_py_class[A.POS],
                       trainable_mirror_anno=A.TRAINABLE_POS,
                       ),
        A.TRAINABLE_POS: partial(NluComponent,
                                 name=A.TRAINABLE_POS,
                                 type=T.TOKEN_CLASSIFIER,
                                 get_default_model=PartOfSpeechJsl.get_default_model,
                                 get_pretrained_model=PartOfSpeechJsl.get_pretrained_model,
                                 get_trainable_model=PartOfSpeechJsl.get_default_trainable_model,
                                 pdf_extractor_methods={'default': default_POS_config,
                                                        'default_full': default_full_config, },
                                 pdf_col_name_substitutor=substitute_pos_cols,
                                 output_level=L.TOKEN,
                                 node=NLP_FEATURE_NODES.nodes[A.TRAINABLE_POS],
                                 description='todo',
                                 provider=ComponentBackends.open_source,
                                 license=Licenses.open_source,
                                 computation_context=ComputeContexts.spark,
                                 output_context=ComputeContexts.spark,
                                 jsl_anno_class_id=A.TRAINABLE_POS,
                                 jsl_anno_py_class=ACR.JSL_anno2_py_class[A.TRAINABLE_POS],
                                 trained_mirror_anno=A.POS,
                                 trainable=True
                                 ),

        A.REGEX_MATCHER: partial(NluComponent,  # TODO , type as ner_converted ok ?
                                 name=A.REGEX_MATCHER,
                                 type=T.HELPER_ANNO,
                                 get_default_model=RegexMatcher.get_default_model,
                                 # TODO extractor??
                                 pdf_extractor_methods={'default': default_ner_converter_config,
                                                        'default_full': default_full_config, },
                                 # TODO substitor??
                                 pdf_col_name_substitutor=substitute_ner_converter_cols,
                                 output_level=L.CHUNK,
                                 node=NLP_FEATURE_NODES.nodes[A.REGEX_MATCHER],
                                 description='Matches chunks in text based on regex rules',
                                 provider=ComponentBackends.open_source,
                                 license=Licenses.open_source,
                                 computation_context=ComputeContexts.spark,
                                 output_context=ComputeContexts.spark,
                                 jsl_anno_class_id=A.REGEX_MATCHER,
                                 jsl_anno_py_class=ACR.JSL_anno2_py_class[A.REGEX_MATCHER],
                                 ),

        A.REGEX_TOKENIZER: partial(NluComponent,
                                   name=A.POS,
                                   type=T.TOKEN_CLASSIFIER,
                                   get_default_model=RegexTokenizer.get_default_model,
                                   pdf_extractor_methods={'default': default_tokenizer_config,
                                                          'default_full': default_full_config, },
                                   pdf_col_name_substitutor=substitute_tokenizer_cols,
                                   output_level=L.TOKEN,
                                   node=NLP_FEATURE_NODES.nodes[A.POS],
                                   description='todo',
                                   provider=ComponentBackends.open_source,
                                   license=Licenses.open_source,
                                   computation_context=ComputeContexts.spark,
                                   output_context=ComputeContexts.spark,
                                   jsl_anno_class_id=A.REGEX_TOKENIZER,
                                   jsl_anno_py_class=ACR.JSL_anno2_py_class[A.REGEX_TOKENIZER],
                                   ),
        A.SENTENCE_DETECTOR: partial(NluComponent,
                                     name=A.SENTENCE_DETECTOR,
                                     type=T.SENTENCE_DETECTOR,
                                     get_default_model=PragmaticSentenceDetector.get_default_model,
                                     pdf_extractor_methods={'default': default_sentence_detector_DL_config,
                                                            'default_full': default_full_config, },
                                     pdf_col_name_substitutor=substitute_sentence_detector_dl_cols,
                                     output_level=L.SENTENCE,
                                     node=NLP_FEATURE_NODES.nodes[A.SENTENCE_DETECTOR],
                                     description='Classical rule based Sentence Detector',
                                     provider=ComponentBackends.open_source,
                                     license=Licenses.open_source,
                                     computation_context=ComputeContexts.spark,
                                     output_context=ComputeContexts.spark,
                                     jsl_anno_class_id=A.SENTENCE_DETECTOR,
                                     jsl_anno_py_class=ACR.JSL_anno2_py_class[A.SENTENCE_DETECTOR],
                                     ),
        A.SENTENCE_DETECTOR_DL: partial(NluComponent,
                                        name=A.SENTENCE_DETECTOR_DL,
                                        type=T.SENTENCE_DETECTOR,
                                        get_default_model=SentenceDetectorDeep.get_default_model,
                                        get_pretrained_model=SentenceDetectorDeep.get_pretrained_model,
                                        # get_trainable_model=SentenceDetectorDeep.get_trainable_model,
                                        pdf_extractor_methods={'default': default_sentence_detector_DL_config,
                                                               'default_full': default_full_config, },
                                        pdf_col_name_substitutor=substitute_sentence_detector_dl_cols,
                                        output_level=L.SENTENCE,
                                        node=NLP_FEATURE_NODES.nodes[A.SENTENCE_DETECTOR_DL],
                                        description='Deep Learning based sentence Detector',
                                        provider=ComponentBackends.open_source,
                                        license=Licenses.open_source,
                                        computation_context=ComputeContexts.spark,
                                        output_context=ComputeContexts.spark,
                                        jsl_anno_class_id=A.SENTENCE_DETECTOR_DL,
                                        jsl_anno_py_class=ACR.JSL_anno2_py_class[A.SENTENCE_DETECTOR_DL],
                                        trainable_mirror_anno=A.TRAINABLE_SENTENCE_DETECTOR_DL
                                        ),
        A.SENTENCE_EMBEDDINGS_CONVERTER: partial(NluComponent,
                                                 name=A.SENTENCE_EMBEDDINGS_CONVERTER,
                                                 type=T.DOCUMENT_EMBEDDING,
                                                 get_default_model=SparkNLPSentenceEmbeddings.get_default_model,
                                                 pdf_extractor_methods={'default': default_sentence_embedding_config,
                                                                        'default_full': default_full_config, },
                                                 pdf_col_name_substitutor=substitute_sent_embed_cols,
                                                 output_level=L.INPUT_DEPENDENT_DOCUMENT_EMBEDDING,
                                                 node=NLP_FEATURE_NODES.nodes[A.SENTENCE_EMBEDDINGS_CONVERTER],
                                                 description='Converts Word Embeddings to Sentence/Document Embeddings',
                                                 provider=ComponentBackends.open_source,
                                                 license=Licenses.open_source,
                                                 computation_context=ComputeContexts.spark,
                                                 output_context=ComputeContexts.spark,
                                                 jsl_anno_class_id=A.SENTENCE_EMBEDDINGS_CONVERTER,
                                                 jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                     A.SENTENCE_EMBEDDINGS_CONVERTER],
                                                 is_storage_ref_producer=True,
                                                 has_storage_ref=True
                                                 ),
        A.STEMMER: partial(NluComponent,
                           name=A.STEMMER,
                           type=T.TOKEN_NORMALIZER,
                           get_default_model=SparkNLPStemmer.get_default_model,
                           pdf_extractor_methods={'default': default_stemm_config,
                                                  'default_full': default_full_config, },
                           pdf_col_name_substitutor=substitute_stem_cols,
                           output_level=L.TOKEN,  # TODO sub-token actually(?)
                           node=NLP_FEATURE_NODES.nodes[A.STEMMER],
                           description='Get stemmed base version of tokens',
                           provider=ComponentBackends.open_source,
                           license=Licenses.open_source,
                           computation_context=ComputeContexts.spark,
                           output_context=ComputeContexts.spark,
                           jsl_anno_class_id=A.STEMMER,
                           jsl_anno_py_class=ACR.JSL_anno2_py_class[A.STEMMER],
                           ),
        A.STOP_WORDS_CLEANER: partial(NluComponent,
                                      name=A.STOP_WORDS_CLEANER,
                                      type=T.TEXT_NORMALIZER,
                                      get_default_model=NLUStopWordcleaner.get_default_model,
                                      get_pretrained_model=NLUStopWordcleaner.get_pretrained_model,
                                      pdf_extractor_methods={'default': default_stopwords_config,
                                                             'default_full': default_full_config, },
                                      pdf_col_name_substitutor=substitute_stopwords_cols,
                                      output_level=L.TOKEN,  # TODO sub-token actually
                                      node=NLP_FEATURE_NODES.nodes[A.STOP_WORDS_CLEANER],
                                      description='Removes stopwords from text based on internal list of stop words.',
                                      provider=ComponentBackends.open_source,
                                      license=Licenses.open_source,
                                      computation_context=ComputeContexts.spark,
                                      output_context=ComputeContexts.spark,
                                      jsl_anno_class_id=A.STOP_WORDS_CLEANER,
                                      jsl_anno_py_class=ACR.JSL_anno2_py_class[A.STOP_WORDS_CLEANER],
                                      ),
        A.SYMMETRIC_DELETE_SPELLCHECKER: partial(NluComponent,
                                                 name=A.SYMMETRIC_DELETE_SPELLCHECKER,
                                                 type=T.SPELL_CHECKER,
                                                 get_default_model=SymmetricSpellChecker.get_default_model,
                                                 get_pretrained_model=SymmetricSpellChecker.get_pretrained_model,
                                                 get_trainable_model=SymmetricSpellChecker.get_default_trainable_model,
                                                 pdf_extractor_methods={'default': default_spell_symmetric_config,
                                                                        'default_full': default_full_config, },
                                                 pdf_col_name_substitutor=substitute_spell_symm_cols,
                                                 output_level=L.TOKEN,  # TODO sub-token actually
                                                 node=NLP_FEATURE_NODES.nodes[A.SYMMETRIC_DELETE_SPELLCHECKER],
                                                 description='Symmetric Spell Checker',
                                                 provider=ComponentBackends.open_source,
                                                 license=Licenses.open_source,
                                                 computation_context=ComputeContexts.spark,
                                                 output_context=ComputeContexts.spark,
                                                 jsl_anno_class_id=A.SYMMETRIC_DELETE_SPELLCHECKER,
                                                 jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                     A.SYMMETRIC_DELETE_SPELLCHECKER],
                                                 trainable_mirror_anno=A.TRAINABLE_SYMMETRIC_DELETE_SPELLCHECKER
                                                 ),
        A.TOKENIZER: partial(NluComponent,
                             name=A.TOKENIZER,
                             type=T.TOKENIZER,
                             get_default_model=DefaultTokenizer.get_default_model,
                             pdf_extractor_methods={'default': default_tokenizer_config,
                                                    'default_full': default_full_config, },
                             pdf_col_name_substitutor=substitute_tokenizer_cols,
                             output_level=L.TOKEN,
                             node=NLP_FEATURE_NODES.nodes[A.TOKENIZER],
                             description='Default tokenizer',
                             provider=ComponentBackends.open_source,
                             license=Licenses.open_source,
                             computation_context=ComputeContexts.spark,
                             output_context=ComputeContexts.spark,
                             jsl_anno_class_id=A.TOKENIZER,
                             jsl_anno_py_class=ACR.JSL_anno2_py_class[A.TOKENIZER],
                             ),
        A.SENTIMENT_DL: partial(NluComponent,
                                name=A.SENTIMENT_DL,
                                type=T.DOCUMENT_CLASSIFIER,
                                get_default_model=SentimentDl.get_default_model,
                                get_pretrained_model=SentimentDl.get_pretrained_model,
                                get_trainable_model=SentimentDl.get_default_trainable_model,
                                pdf_extractor_methods={'default': default_sentiment_dl_config,
                                                       'default_full': default_full_config, },
                                pdf_col_name_substitutor=substitute_sentiment_dl_cols,
                                output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                node=NLP_FEATURE_NODES.nodes[A.SENTIMENT_DL],
                                description='Deep Learning based Sentiment Detector',
                                provider=ComponentBackends.open_source,
                                license=Licenses.open_source,
                                computation_context=ComputeContexts.spark,
                                output_context=ComputeContexts.spark,
                                jsl_anno_class_id=A.SENTIMENT_DL,
                                jsl_anno_py_class=ACR.JSL_anno2_py_class[A.SENTIMENT_DL],
                                trainable_mirror_anno=A.TRAINABLE_SENTIMENT_DL,
                                is_storage_ref_consumer=True,
                                has_storage_ref=True
                                ),
        A.TRAINABLE_SENTIMENT_DL: partial(NluComponent,
                                          name=A.TRAINABLE_SENTIMENT_DL,
                                          type=T.DOCUMENT_CLASSIFIER,
                                          get_default_model=SentimentDl.get_default_model,
                                          get_pretrained_model=SentimentDl.get_pretrained_model,
                                          get_trainable_model=SentimentDl.get_default_trainable_model,
                                          pdf_extractor_methods={'default': default_sentiment_dl_config,
                                                                 'default_full': default_full_config, },
                                          pdf_col_name_substitutor=substitute_sentiment_dl_cols,
                                          output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                          node=NLP_FEATURE_NODES.nodes[A.TRAINABLE_SENTIMENT_DL],
                                          description='Deep Learning based Sentiment Detector',
                                          provider=ComponentBackends.open_source,
                                          license=Licenses.open_source,
                                          computation_context=ComputeContexts.spark,
                                          output_context=ComputeContexts.spark,
                                          jsl_anno_class_id=A.TRAINABLE_SENTIMENT_DL,
                                          jsl_anno_py_class=ACR.JSL_anno2_py_class[A.TRAINABLE_SENTIMENT_DL],
                                          trained_mirror_anno=A.SENTIMENT_DL,
                                          is_storage_ref_consumer=True,
                                          has_storage_ref=True,
                                          trainable=True
                                          ),
        A.SENTIMENT_DETECTOR: partial(NluComponent,
                                      name=A.SENTIMENT_DETECTOR,
                                      type=T.DOCUMENT_CLASSIFIER,
                                      get_default_model=Sentiment.get_default_model,
                                      # get_pretrained_model = Sentiment.get_pretrained_model,
                                      get_trainable_model=Sentiment.get_default_trainable_model,
                                      pdf_extractor_methods={'default': default_sentiment_config,
                                                             'default_full': default_full_config, },
                                      pdf_col_name_substitutor=substitute_sentiment_dl_cols,
                                      output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                      node=NLP_FEATURE_NODES.nodes[A.SENTIMENT_DETECTOR],
                                      description='Rule based sentiment detector, which calculates a score based on predefined keywords',
                                      provider=ComponentBackends.open_source,
                                      license=Licenses.open_source,
                                      computation_context=ComputeContexts.spark,
                                      output_context=ComputeContexts.spark,
                                      jsl_anno_class_id=A.SENTIMENT_DETECTOR,
                                      jsl_anno_py_class=ACR.JSL_anno2_py_class[A.SENTIMENT_DETECTOR],
                                      trainable_mirror_anno=A.TRAINABLE_SENTIMENT,
                                      ),
        A.VIVEKN_SENTIMENT: partial(NluComponent,
                                    name=A.VIVEKN_SENTIMENT,
                                    type=T.DOCUMENT_CLASSIFIER,
                                    get_default_model=ViveknSentiment.get_default_model,
                                    get_pretrained_model=ViveknSentiment.get_pretrained_model,
                                    get_trainable_model=ViveknSentiment.get_default_trainable_model,
                                    pdf_extractor_methods={'default': default_sentiment_vivk_config,
                                                           'default_full': default_full_config, },
                                    pdf_col_name_substitutor=substitute_sentiment_vivk_cols,
                                    output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                    node=NLP_FEATURE_NODES.nodes[A.VIVEKN_SENTIMENT],
                                    description='Sentiment detector based on the vivekn algorithm',
                                    provider=ComponentBackends.open_source,
                                    license=Licenses.open_source,
                                    computation_context=ComputeContexts.spark,
                                    output_context=ComputeContexts.spark,
                                    jsl_anno_class_id=A.VIVEKN_SENTIMENT,
                                    jsl_anno_py_class=ACR.JSL_anno2_py_class[A.VIVEKN_SENTIMENT],
                                    trainable_mirror_anno=A.TRAINABLE_VIVEKN_SENTIMENT
                                    ),
        A.WORD_EMBEDDINGS: partial(NluComponent,
                                   name=A.WORD_EMBEDDINGS,
                                   type=T.TOKEN_EMBEDDING,
                                   get_default_model=Glove.get_default_model,
                                   get_pretrained_model=Glove.get_pretrained_model,
                                   pdf_extractor_methods={'default': default_word_embedding_config,
                                                          'default_full': default_full_config, },
                                   pdf_col_name_substitutor=substitute_word_embed_cols,
                                   output_level=L.TOKEN,
                                   node=NLP_FEATURE_NODES.nodes[A.WORD_EMBEDDINGS],
                                   description='Static Word Embeddings generator, i.e. Glove, etc..',
                                   provider=ComponentBackends.open_source,
                                   license=Licenses.open_source,
                                   computation_context=ComputeContexts.spark,
                                   output_context=ComputeContexts.spark,
                                   jsl_anno_class_id=A.WORD_EMBEDDINGS,
                                   jsl_anno_py_class=ACR.JSL_anno2_py_class[A.WORD_EMBEDDINGS],
                                   is_storage_ref_producer=True,
                                   has_storage_ref=True,
                                   ),
        A.WORD_SEGMENTER: partial(NluComponent,
                                  name=A.WORD_SEGMENTER,
                                  type=T.TOKENIZER,
                                  get_default_model=WordSegmenter.get_default_model,
                                  get_pretrained_model=WordSegmenter.get_pretrained_model,
                                  get_trainable_model=WordSegmenter.get_default_model_for_lang,
                                  pdf_extractor_methods={'default': default_word_segmenter_config,
                                                         'default_full': default_full_config, },
                                  pdf_col_name_substitutor=substitute_word_seg_cols,
                                  output_level=L.TOKEN,
                                  node=NLP_FEATURE_NODES.nodes[A.WORD_SEGMENTER],
                                  description='Segments non white space seperated text into tokens, like Chinese or Japanese. ',
                                  provider=ComponentBackends.open_source,
                                  license=Licenses.open_source,
                                  computation_context=ComputeContexts.spark,
                                  output_context=ComputeContexts.spark,
                                  jsl_anno_class_id=A.WORD_SEGMENTER,
                                  jsl_anno_py_class=ACR.JSL_anno2_py_class[A.WORD_SEGMENTER],
                                  trainable_mirror_anno=A.TRAINABLE_WORD_SEGMENTER
                                  ),
        A.YAKE_KEYWORD_EXTRACTION: partial(NluComponent,
                                           name=A.YAKE_KEYWORD_EXTRACTION,
                                           type=T.CHUNK_CLASSIFIER,
                                           # TODO??? Classifies each chunks/ngram likelyhood of beeing a Ketyword
                                           get_default_model=Yake.get_default_model,
                                           pdf_extractor_methods={'default': default_yake_config,
                                                                  'default_full': default_full_config, },
                                           pdf_col_name_substitutor=substitute_YAKE_cols,
                                           output_level=L.CHUNK,  # Actual sub-ngram/ngram filter
                                           node=NLP_FEATURE_NODES.nodes[A.YAKE_KEYWORD_EXTRACTION],
                                           description='Calculates probability of each n-gram beeing a keyword. Yields a selection of these n-grams with specific filters,i.e. length, probability, etc..',
                                           provider=ComponentBackends.open_source,
                                           license=Licenses.open_source,
                                           computation_context=ComputeContexts.spark,
                                           output_context=ComputeContexts.spark,
                                           jsl_anno_class_id=A.YAKE_KEYWORD_EXTRACTION,
                                           jsl_anno_py_class=ACR.JSL_anno2_py_class[A.YAKE_KEYWORD_EXTRACTION],
                                           has_storage_ref=False,
                                           is_storage_ref_consumer=False,
                                           is_storage_ref_producer=False,
                                           ),

        A.DOC2VEC: partial(NluComponent,
                           name=A.DOC2VEC,
                           type=T.TOKEN_EMBEDDING,
                           get_default_model=Doc2Vec.get_default_model,
                           get_trainable_model=Doc2Vec.get_trainable_model,
                           get_pretrained_model=Doc2Vec.get_pretrained_model,
                           pdf_extractor_methods={'default': default_sentence_embedding_config,
                                                  'default_full': default_full_config, },
                           pdf_col_name_substitutor=substitute_sent_embed_cols,
                           output_level=L.TOKEN,
                           node=NLP_FEATURE_NODES.nodes[A.DOC2VEC],
                           description='Trains a Word2Vec model_anno_obj that creates vector representations of words in a text corpus. The algorithm first constructs a vocabulary from the corpus and then learns vector representation of words in the vocabulary. The vector representation can be used as features in natural language processing and machine learning algorithms.',
                           provider=ComponentBackends.open_source,
                           license=Licenses.open_source,
                           computation_context=ComputeContexts.spark,
                           output_context=ComputeContexts.spark,
                           jsl_anno_class_id=A.DOC2VEC,
                           jsl_anno_py_class=ACR.JSL_anno2_py_class[A.DOC2VEC],
                           has_storage_ref=True,
                           is_storage_ref_producer=True,
                           trainable_mirror_anno=A.TRAINABLE_DOC2VEC
                           ),

        A.TRAINABLE_DOC2VEC: partial(NluComponent,
                                     name=A.TRAINABLE_DOC2VEC,
                                     type=T.TOKEN_EMBEDDING,
                                     get_default_model=Doc2Vec.get_default_model,
                                     get_trainable_model=Doc2Vec.get_trainable_model,
                                     get_pretrained_model=Doc2Vec.get_pretrained_model,
                                     pdf_extractor_methods={'default': default_sentence_embedding_config,
                                                            'default_full': default_full_config, },
                                     pdf_col_name_substitutor=substitute_sent_embed_cols,
                                     output_level=L.TOKEN,
                                     node=NLP_FEATURE_NODES.nodes[A.TRAINABLE_DOC2VEC],
                                     description='Trains a Word2Vec model_anno_obj that creates vector representations of words in a text corpus. The algorithm first constructs a vocabulary from the corpus and then learns vector representation of words in the vocabulary. The vector representation can be used as features in natural language processing and machine learning algorithms.',
                                     provider=ComponentBackends.open_source,
                                     license=Licenses.open_source,
                                     computation_context=ComputeContexts.spark,
                                     output_context=ComputeContexts.spark,
                                     jsl_anno_class_id=A.TRAINABLE_DOC2VEC,
                                     jsl_anno_py_class=ACR.JSL_anno2_py_class[A.TRAINABLE_DOC2VEC],
                                     has_storage_ref=True,
                                     is_storage_ref_producer=True,
                                     trained_mirror_anno=A.DOC2VEC,
                                     trainable=True
                                     ),

        ### ________ TRANSFORMERS BELOW _________
        A.ALBERT_EMBEDDINGS: partial(NluComponent,
                                     name=A.ALBERT_EMBEDDINGS,
                                     type=T.TOKEN_EMBEDDING,
                                     get_default_model=SparkNLPAlbert.get_default_model,
                                     get_pretrained_model=SparkNLPAlbert.get_pretrained_model,
                                     pdf_extractor_methods={'default': default_word_embedding_config,
                                                            'default_full': default_full_config, },
                                     pdf_col_name_substitutor=substitute_word_embed_cols,
                                     output_level=L.TOKEN,
                                     node=NLP_FEATURE_NODES.nodes[A.ALBERT_EMBEDDINGS],
                                     description='ALBERT: A LITE BERT FOR SELF-SUPERVISED LEARNING OF LANGUAGE REPRESENTATIONS - Google Research, Toyota Technological Institute at Chicago',
                                     provider=ComponentBackends.open_source,
                                     license=Licenses.open_source,
                                     computation_context=ComputeContexts.spark,
                                     output_context=ComputeContexts.spark,
                                     jsl_anno_class_id=A.ALBERT_EMBEDDINGS,
                                     jsl_anno_py_class=ACR.JSL_anno2_py_class[A.ALBERT_EMBEDDINGS],
                                     has_storage_ref=True,
                                     is_storage_ref_producer=True,
                                     ),

        A.COREF_SPAN_BERT: partial(NluComponent,
                                   name=A.COREF_SPAN_BERT,
                                   type=T.TOKEN_CLASSIFIER,
                                   get_default_model=CorefBert.get_default_model,
                                   get_pretrained_model=CorefBert.get_pretrained_model,
                                   pdf_extractor_methods={'default': default_coref_spanbert_config,
                                                          'default_full': default_full_config, },
                                   pdf_col_name_substitutor=substitute_coref_cols,
                                   output_level=L.CO_REFERENCE,
                                   node=NLP_FEATURE_NODES.nodes[A.COREF_SPAN_BERT],
                                   description='Spanbert for coreference ',
                                   provider=ComponentBackends.open_source,
                                   license=Licenses.open_source,
                                   computation_context=ComputeContexts.spark,
                                   output_context=ComputeContexts.spark,
                                   jsl_anno_class_id=A.COREF_SPAN_BERT,
                                   jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                       A.COREF_SPAN_BERT],
                                   ),

        A.DEBERTA_FOR_TOKEN_CLASSIFICATION: partial(NluComponent,
                                                    name=A.DEBERTA_FOR_TOKEN_CLASSIFICATION,
                                                    type=T.TRANSFORMER_TOKEN_CLASSIFIER,
                                                    get_default_model=TokenDeBerta.get_default_model,
                                                    get_pretrained_model=TokenDeBerta.get_pretrained_model,
                                                    pdf_extractor_methods={'default': default_token_classifier_config,
                                                                           'default_full': default_full_config, },
                                                    pdf_col_name_substitutor=substitute_transformer_token_classifier_cols,
                                                    output_level=L.TOKEN,  # Handled like NER model_anno_obj
                                                    node=NLP_FEATURE_NODES.nodes[A.DEBERTA_FOR_TOKEN_CLASSIFICATION],
                                                    description='AlbertForTokenClassification can load ALBERT Models with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks.',
                                                    provider=ComponentBackends.open_source,
                                                    license=Licenses.open_source,
                                                    computation_context=ComputeContexts.spark,
                                                    output_context=ComputeContexts.spark,
                                                    jsl_anno_class_id=A.DEBERTA_FOR_TOKEN_CLASSIFICATION,
                                                    jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                        A.DEBERTA_FOR_TOKEN_CLASSIFICATION],
                                                    ),

        A.CAMEMBERT_FOR_TOKEN_CLASSIFICATION: partial(NluComponent,
                                                      name=A.CAMEMBERT_FOR_TOKEN_CLASSIFICATION,
                                                      type=T.TRANSFORMER_TOKEN_CLASSIFIER,
                                                      get_default_model=TokenCamembert.get_default_model,
                                                      get_pretrained_model=TokenCamembert.get_pretrained_model,
                                                      pdf_extractor_methods={'default': default_token_classifier_config,
                                                                             'default_full': default_full_config, },
                                                      pdf_col_name_substitutor=substitute_transformer_token_classifier_cols,
                                                      output_level=L.TOKEN,  # Handled like NER model_anno_obj
                                                      node=NLP_FEATURE_NODES.nodes[
                                                          A.CAMEMBERT_FOR_TOKEN_CLASSIFICATION],
                                                      description='CamemBertForTokenClassification can load ALBERT Models with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks.',
                                                      provider=ComponentBackends.open_source,
                                                      license=Licenses.open_source,
                                                      computation_context=ComputeContexts.spark,
                                                      output_context=ComputeContexts.spark,
                                                      jsl_anno_class_id=A.CAMEMBERT_FOR_TOKEN_CLASSIFICATION,
                                                      jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                          A.CAMEMBERT_FOR_TOKEN_CLASSIFICATION],
                                                      ),

        A.ALBERT_FOR_TOKEN_CLASSIFICATION: partial(NluComponent,
                                                   name=A.ALBERT_FOR_TOKEN_CLASSIFICATION,
                                                   type=T.TRANSFORMER_TOKEN_CLASSIFIER,
                                                   get_default_model=TokenAlbert.get_default_model,
                                                   get_pretrained_model=TokenAlbert.get_pretrained_model,
                                                   pdf_extractor_methods={'default': default_token_classifier_config,
                                                                          'default_full': default_full_config, },
                                                   pdf_col_name_substitutor=substitute_transformer_token_classifier_cols,
                                                   output_level=L.TOKEN,  # Handled like NER model_anno_obj
                                                   node=NLP_FEATURE_NODES.nodes[A.ALBERT_FOR_TOKEN_CLASSIFICATION],
                                                   description='AlbertForTokenClassification can load ALBERT Models with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks.',
                                                   provider=ComponentBackends.open_source,
                                                   license=Licenses.open_source,
                                                   computation_context=ComputeContexts.spark,
                                                   output_context=ComputeContexts.spark,
                                                   jsl_anno_class_id=A.ALBERT_FOR_TOKEN_CLASSIFICATION,
                                                   jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                       A.ALBERT_FOR_TOKEN_CLASSIFICATION],
                                                   ),  #

        A.CAMEMBERT_EMBEDDINGS: partial(NluComponent,
                                        name=A.CAMEMBERT_EMBEDDINGS,
                                        type=T.TOKEN_EMBEDDING,
                                        get_default_model=CamemBert.get_default_model,
                                        get_pretrained_model=CamemBert.get_pretrained_model,
                                        pdf_extractor_methods={'default': default_word_embedding_config,
                                                               'default_full': default_full_config, },
                                        pdf_col_name_substitutor=substitute_word_embed_cols,
                                        output_level=L.TOKEN,
                                        node=NLP_FEATURE_NODES.nodes[A.CAMEMBERT_EMBEDDINGS],
                                        description='Token-level embeddings using CAMEN-BERT',
                                        provider=ComponentBackends.open_source,
                                        license=Licenses.open_source,
                                        computation_context=ComputeContexts.spark,
                                        output_context=ComputeContexts.spark,
                                        jsl_anno_class_id=A.CAMEMBERT_EMBEDDINGS,
                                        jsl_anno_py_class=ACR.JSL_anno2_py_class[A.CAMEMBERT_EMBEDDINGS],
                                        has_storage_ref=True,
                                        is_storage_ref_producer=True,
                                        ),

        A.BERT_EMBEDDINGS: partial(NluComponent,
                                   name=A.BERT_EMBEDDINGS,
                                   type=T.TOKEN_EMBEDDING,
                                   get_default_model=SparkNLPBert.get_default_model,
                                   get_pretrained_model=SparkNLPBert.get_pretrained_model,
                                   pdf_extractor_methods={'default': default_word_embedding_config,
                                                          'default_full': default_full_config, },
                                   pdf_col_name_substitutor=substitute_word_embed_cols,
                                   output_level=L.TOKEN,
                                   node=NLP_FEATURE_NODES.nodes[A.BERT_EMBEDDINGS],
                                   description='Token-level embeddings using BERT. BERT (Bidirectional Encoder Representations from Transformers) provides dense vector representations for natural language by using a deep, pre-trained neural network with the Transformer architecture.',
                                   provider=ComponentBackends.open_source,
                                   license=Licenses.open_source,
                                   computation_context=ComputeContexts.spark,
                                   output_context=ComputeContexts.spark,
                                   jsl_anno_class_id=A.BERT_EMBEDDINGS,
                                   jsl_anno_py_class=ACR.JSL_anno2_py_class[A.BERT_EMBEDDINGS],
                                   has_storage_ref=True,
                                   is_storage_ref_producer=True,
                                   ),
        A.INSTRUCTOR_SENTENCE_EMBEDDINGS: partial(NluComponent,
                                                  name=A.INSTRUCTOR_SENTENCE_EMBEDDINGS,
                                                  type=T.DOCUMENT_EMBEDDING,
                                                  get_default_model=Instructor.get_default_model,
                                                  get_pretrained_model=Instructor.get_pretrained_model,
                                                  pdf_extractor_methods={'default': default_sentence_embedding_config,
                                                                         'default_full': default_full_config, },
                                                  pdf_col_name_substitutor=substitute_sent_embed_cols,
                                                  output_level=L.INPUT_DEPENDENT_DOCUMENT_EMBEDDING,
                                                  node=NLP_FEATURE_NODES.nodes[A.INSTRUCTOR_SENTENCE_EMBEDDINGS],
                                                  description='Sentence-level embeddings using Instructor. Instructor, an instruction-finetuned text embedding model that can generate text embeddings tailored to any task (e.g., classification, retrieval, clustering, text evaluation, etc.) and domains (e.g., science, finance, etc.) by simply providing the task instruction, without any finetuning.',
                                                  provider=ComponentBackends.open_source,
                                                  license=Licenses.open_source,
                                                  computation_context=ComputeContexts.spark,
                                                  output_context=ComputeContexts.spark,
                                                  jsl_anno_class_id=A.INSTRUCTOR_SENTENCE_EMBEDDINGS,
                                                  jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                      A.INSTRUCTOR_SENTENCE_EMBEDDINGS],
                                                  has_storage_ref=True,
                                                  is_storage_ref_producer=True,
                                                  ),
        A.BERT_SENTENCE_EMBEDDINGS: partial(NluComponent,
                                            name=A.BERT_SENTENCE_EMBEDDINGS,
                                            type=T.DOCUMENT_EMBEDDING,
                                            get_default_model=BertSentence.get_default_model,
                                            get_pretrained_model=BertSentence.get_pretrained_model,
                                            pdf_extractor_methods={'default': default_sentence_embedding_config,
                                                                   'default_full': default_full_config, },
                                            pdf_col_name_substitutor=substitute_sent_embed_cols,
                                            output_level=L.INPUT_DEPENDENT_DOCUMENT_EMBEDDING,
                                            node=NLP_FEATURE_NODES.nodes[A.BERT_SENTENCE_EMBEDDINGS],
                                            description='Sentence-level embeddings using BERT. BERT (Bidirectional Encoder Representations from Transformers) provides dense vector representations for natural language by using a deep, pre-trained neural network with the Transformer architecture.',
                                            provider=ComponentBackends.open_source,
                                            license=Licenses.open_source,
                                            computation_context=ComputeContexts.spark,
                                            output_context=ComputeContexts.spark,
                                            jsl_anno_class_id=A.BERT_SENTENCE_EMBEDDINGS,
                                            jsl_anno_py_class=ACR.JSL_anno2_py_class[A.BERT_SENTENCE_EMBEDDINGS],
                                            has_storage_ref=True,
                                            is_storage_ref_producer=True,
                                            ),
        A.E5_SENTENCE_EMBEDDINGS: partial(NluComponent,
                                            name=A.E5_SENTENCE_EMBEDDINGS,
                                            type=T.DOCUMENT_EMBEDDING,
                                            get_default_model=E5.get_default_model,
                                            get_pretrained_model=E5.get_pretrained_model,
                                            pdf_extractor_methods={'default': default_sentence_embedding_config,
                                                                   'default_full': default_full_config, },
                                            pdf_col_name_substitutor=substitute_sent_embed_cols,
                                            output_level=L.INPUT_DEPENDENT_DOCUMENT_EMBEDDING,
                                            node=NLP_FEATURE_NODES.nodes[A.E5_SENTENCE_EMBEDDINGS],
                                            description='Sentence-level embeddings using E5. E5, a weakly supervised text embedding model that can generate text embeddings tailored to any task (e.g., classification, retrieval, clustering, text evaluation, etc.).',
                                            provider=ComponentBackends.open_source,
                                            license=Licenses.open_source,
                                            computation_context=ComputeContexts.spark,
                                            output_context=ComputeContexts.spark,
                                            jsl_anno_class_id=A.E5_SENTENCE_EMBEDDINGS,
                                            jsl_anno_py_class=ACR.JSL_anno2_py_class[A.E5_SENTENCE_EMBEDDINGS],
                                            has_storage_ref=True,
                                            is_storage_ref_producer=True,
                                            ),
        A.BGE_SENTENCE_EMBEDDINGS: partial(NluComponent,
                                          name=A.BGE_SENTENCE_EMBEDDINGS,
                                          type=T.DOCUMENT_EMBEDDING,
                                          get_default_model=BGE.get_default_model,
                                          get_pretrained_model=BGE.get_pretrained_model,
                                          pdf_extractor_methods={'default': default_sentence_embedding_config,
                                                                 'default_full': default_full_config, },
                                          pdf_col_name_substitutor=substitute_sent_embed_cols,
                                          output_level=L.INPUT_DEPENDENT_DOCUMENT_EMBEDDING,
                                          node=NLP_FEATURE_NODES.nodes[A.BGE_SENTENCE_EMBEDDINGS],
                                          description='Sentence-level embeddings using BGE. E5, a weakly supervised text embedding model that can generate text embeddings tailored to any task (e.g., classification, retrieval, clustering, text evaluation, etc.).',
                                          provider=ComponentBackends.open_source,
                                          license=Licenses.open_source,
                                          computation_context=ComputeContexts.spark,
                                          output_context=ComputeContexts.spark,
                                          jsl_anno_class_id=A.BGE_SENTENCE_EMBEDDINGS,
                                          jsl_anno_py_class=ACR.JSL_anno2_py_class[A.BGE_SENTENCE_EMBEDDINGS],
                                          has_storage_ref=True,
                                          is_storage_ref_producer=True,
                                          ),


        A.OPENAI_EMBEDDINGS: partial(NluComponent,
                                            name=A.OPENAI_EMBEDDINGS,
                                            type=T.DOCUMENT_EMBEDDING,
                                            get_default_model=OpenaiEmbeddings.get_default_model,
                                            get_pretrained_model=OpenaiEmbeddings.get_pretrained_model,
                                            pdf_extractor_methods={'default': default_sentence_embedding_config,
                                                                   'default_full': default_full_config, },
                                            pdf_col_name_substitutor=substitute_sent_embed_cols,
                                            output_level=L.INPUT_DEPENDENT_DOCUMENT_EMBEDDING,
                                            node=NLP_FEATURE_NODES.nodes[A.OPENAI_EMBEDDINGS],
                                            description='',
                                            provider=ComponentBackends.open_source,
                                            license=Licenses.open_source,
                                            computation_context=ComputeContexts.spark,
                                            output_context=ComputeContexts.spark,
                                            jsl_anno_class_id=A.OPENAI_EMBEDDINGS,
                                            jsl_anno_py_class=ACR.JSL_anno2_py_class[A.OPENAI_EMBEDDINGS],
                                            has_storage_ref=True,
                                            is_storage_ref_producer=True,
                                            ),

        A.BERT_FOR_TOKEN_CLASSIFICATION: partial(NluComponent,
                                                 name=A.BERT_FOR_TOKEN_CLASSIFICATION,
                                                 type=T.TRANSFORMER_TOKEN_CLASSIFIER,
                                                 get_default_model=TokenBert.get_default_model,
                                                 get_pretrained_model=TokenBert.get_pretrained_model,
                                                 pdf_extractor_methods={'default': default_token_classifier_config,
                                                                        'default_full': default_full_config, },
                                                 pdf_col_name_substitutor=substitute_transformer_token_classifier_cols,
                                                 output_level=L.TOKEN,  # Handled like NER model_anno_obj
                                                 node=NLP_FEATURE_NODES.nodes[A.BERT_FOR_TOKEN_CLASSIFICATION],
                                                 description='BertForTokenClassification can load Bert Models with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks.',
                                                 provider=ComponentBackends.open_source,
                                                 license=Licenses.open_source,
                                                 computation_context=ComputeContexts.spark,
                                                 output_context=ComputeContexts.spark,
                                                 jsl_anno_class_id=A.BERT_FOR_TOKEN_CLASSIFICATION,
                                                 jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                     A.BERT_FOR_TOKEN_CLASSIFICATION],
                                                 ),

        A.BERT_FOR_SEQUENCE_CLASSIFICATION: partial(NluComponent,
                                                    name=A.BERT_FOR_SEQUENCE_CLASSIFICATION,
                                                    type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                    get_default_model=SeqBertClassifier.get_default_model,
                                                    get_pretrained_model=SeqBertClassifier.get_pretrained_model,
                                                    pdf_extractor_methods={'default': default_classifier_dl_config,
                                                                           'default_full': default_full_config, },
                                                    pdf_col_name_substitutor=substitute_seq_bert_classifier_cols,
                                                    output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                    node=NLP_FEATURE_NODES.nodes[A.BERT_FOR_SEQUENCE_CLASSIFICATION],
                                                    description='BertForSequenceClassification can load Bert Models with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for multi-class document classification tasks.',
                                                    provider=ComponentBackends.open_source,
                                                    license=Licenses.open_source,
                                                    computation_context=ComputeContexts.spark,
                                                    output_context=ComputeContexts.spark,
                                                    jsl_anno_class_id=A.BERT_FOR_SEQUENCE_CLASSIFICATION,
                                                    jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                        A.BERT_FOR_SEQUENCE_CLASSIFICATION],
                                                    ),
        A.CAMEMBERT_FOR_SEQUENCE_CLASSIFICATION: partial(NluComponent,
                                                         name=A.CAMEMBERT_FOR_SEQUENCE_CLASSIFICATION,
                                                         type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                         get_default_model=SeqCamembertClassifier.get_default_model,
                                                         get_pretrained_model=SeqCamembertClassifier.get_pretrained_model,
                                                         pdf_extractor_methods={'default': default_classifier_dl_config,
                                                                                'default_full': default_full_config, },
                                                         pdf_col_name_substitutor=substitute_seq_bert_classifier_cols,
                                                         output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                         node=NLP_FEATURE_NODES.nodes[
                                                             A.CAMEMBERT_FOR_SEQUENCE_CLASSIFICATION],
                                                         description='BertForSequenceClassification can load Bert Models with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for multi-class document classification tasks.',
                                                         provider=ComponentBackends.open_source,
                                                         license=Licenses.open_source,
                                                         computation_context=ComputeContexts.spark,
                                                         output_context=ComputeContexts.spark,
                                                         jsl_anno_class_id=A.CAMEMBERT_FOR_SEQUENCE_CLASSIFICATION,
                                                         jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                             A.CAMEMBERT_FOR_SEQUENCE_CLASSIFICATION],
                                                         ),

        A.DISTIL_BERT_EMBEDDINGS: partial(NluComponent,
                                          name=A.DISTIL_BERT_EMBEDDINGS,
                                          type=T.TOKEN_EMBEDDING,
                                          get_default_model=DistilBert.get_default_model,
                                          get_pretrained_model=DistilBert.get_pretrained_model,
                                          pdf_extractor_methods={'default': default_word_embedding_config,
                                                                 'default_full': default_full_config, },
                                          pdf_col_name_substitutor=substitute_word_embed_cols,
                                          output_level=L.TOKEN,
                                          node=NLP_FEATURE_NODES.nodes[A.DISTIL_BERT_EMBEDDINGS],
                                          description='DistilBERT is a small, fast, cheap and light Transformer model_anno_obj trained by distilling BERT base. It has 40% less parameters than bert-base-uncased, runs 60% faster while preserving over 95% of BERT’s performances as measured on the GLUE language understanding benchmark.',
                                          provider=ComponentBackends.open_source,
                                          license=Licenses.open_source,
                                          computation_context=ComputeContexts.spark,
                                          output_context=ComputeContexts.spark,
                                          jsl_anno_class_id=A.DISTIL_BERT_EMBEDDINGS,
                                          jsl_anno_py_class=ACR.JSL_anno2_py_class[A.DISTIL_BERT_EMBEDDINGS],
                                          has_storage_ref=True,
                                          is_storage_ref_producer=True,
                                          ),
        A.DISTIL_BERT_FOR_SEQUENCE_CLASSIFICATION: partial(NluComponent,
                                                           name=A.DISTIL_BERT_FOR_SEQUENCE_CLASSIFICATION,
                                                           type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                           get_default_model=SeqDilstilBertClassifier.get_default_model,
                                                           get_pretrained_model=SeqDilstilBertClassifier.get_pretrained_model,
                                                           pdf_extractor_methods={
                                                               'default': default_classifier_dl_config,
                                                               'default_full': default_full_config, },
                                                           pdf_col_name_substitutor=substitute_seq_bert_classifier_cols,
                                                           output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                           node=NLP_FEATURE_NODES.nodes[
                                                               A.DISTIL_BERT_FOR_SEQUENCE_CLASSIFICATION],
                                                           description='DistilBertForSequenceClassification can load DistilBERT Models with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for multi-class document classification tasks.',
                                                           provider=ComponentBackends.open_source,
                                                           license=Licenses.open_source,
                                                           computation_context=ComputeContexts.spark,
                                                           output_context=ComputeContexts.spark,
                                                           jsl_anno_class_id=A.DISTIL_BERT_FOR_SEQUENCE_CLASSIFICATION,
                                                           jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                               A.DISTIL_BERT_FOR_SEQUENCE_CLASSIFICATION],
                                                           ),
        A.DISTIL_BERT_FOR_TOKEN_CLASSIFICATION: partial(NluComponent,
                                                        name=A.DISTIL_BERT_FOR_TOKEN_CLASSIFICATION,
                                                        type=T.TRANSFORMER_TOKEN_CLASSIFIER,
                                                        get_default_model=TokenDistilBert.get_default_model,
                                                        get_pretrained_model=TokenDistilBert.get_pretrained_model,
                                                        pdf_extractor_methods={
                                                            'default': default_token_classifier_config,
                                                            'default_full': default_full_config, },
                                                        pdf_col_name_substitutor=substitute_transformer_token_classifier_cols,
                                                        output_level=L.TOKEN,
                                                        node=NLP_FEATURE_NODES.nodes[
                                                            A.DISTIL_BERT_FOR_TOKEN_CLASSIFICATION],
                                                        description='DistilBertForTokenClassification can load Bert Models with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks.',
                                                        provider=ComponentBackends.open_source,
                                                        license=Licenses.open_source,
                                                        computation_context=ComputeContexts.spark,
                                                        output_context=ComputeContexts.spark,
                                                        jsl_anno_class_id=A.DISTIL_BERT_FOR_TOKEN_CLASSIFICATION,
                                                        jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                            A.DISTIL_BERT_FOR_TOKEN_CLASSIFICATION],
                                                        ),
        A.ELMO_EMBEDDINGS: partial(NluComponent,
                                   name=A.ELMO_EMBEDDINGS,
                                   type=T.TOKEN_EMBEDDING,
                                   get_default_model=SparkNLPElmo.get_default_model,
                                   get_pretrained_model=SparkNLPElmo.get_pretrained_model,
                                   pdf_extractor_methods={'default': default_word_embedding_config,
                                                          'default_full': default_full_config, },
                                   pdf_col_name_substitutor=substitute_word_embed_cols,
                                   output_level=L.TOKEN,
                                   node=NLP_FEATURE_NODES.nodes[A.ELMO_EMBEDDINGS],
                                   description='Word embeddings from ELMo (Embeddings from Language Models), a language model_anno_obj trained on the 1 Billion Word Benchmark.',
                                   provider=ComponentBackends.open_source,
                                   license=Licenses.open_source,
                                   computation_context=ComputeContexts.spark,
                                   output_context=ComputeContexts.spark,
                                   jsl_anno_class_id=A.ELMO_EMBEDDINGS,
                                   jsl_anno_py_class=ACR.JSL_anno2_py_class[A.ELMO_EMBEDDINGS],
                                   has_storage_ref=True,
                                   is_storage_ref_producer=True,
                                   ),
        A.LONGFORMER_EMBEDDINGS: partial(NluComponent,
                                         name=A.LONGFORMER_EMBEDDINGS,
                                         type=T.TOKEN_EMBEDDING,
                                         get_default_model=Longformer.get_default_model,
                                         get_pretrained_model=Longformer.get_pretrained_model,
                                         pdf_extractor_methods={'default': default_word_embedding_config,
                                                                'default_full': default_full_config, },
                                         pdf_col_name_substitutor=substitute_word_embed_cols,
                                         output_level=L.TOKEN,
                                         node=NLP_FEATURE_NODES.nodes[A.LONGFORMER_EMBEDDINGS],
                                         description='Longformer is a transformer model_anno_obj for long documents. The Longformer model_anno_obj was presented in Longformer: The Long-Document Transformer by Iz Beltagy, Matthew E. Peters, Arman Cohan. longformer-base-4096 is a BERT-like model_anno_obj started from the RoBERTa checkpoint and pretrained for MLM on long documents. It supports sequences of length up to 4,096.',
                                         provider=ComponentBackends.open_source,
                                         license=Licenses.open_source,
                                         computation_context=ComputeContexts.spark,
                                         output_context=ComputeContexts.spark,
                                         jsl_anno_class_id=A.LONGFORMER_EMBEDDINGS,
                                         jsl_anno_py_class=ACR.JSL_anno2_py_class[A.LONGFORMER_EMBEDDINGS],
                                         has_storage_ref=True,
                                         is_storage_ref_producer=True,
                                         ),

        A.LONGFORMER_FOR_TOKEN_CLASSIFICATION: partial(NluComponent,
                                                       name=A.LONGFORMER_FOR_TOKEN_CLASSIFICATION,
                                                       type=T.TRANSFORMER_TOKEN_CLASSIFIER,
                                                       get_default_model=TokenLongFormer.get_default_model,
                                                       get_pretrained_model=TokenLongFormer.get_pretrained_model,
                                                       pdf_extractor_methods={
                                                           'default': default_token_classifier_config,
                                                           'default_full': default_full_config, },
                                                       pdf_col_name_substitutor=substitute_transformer_token_classifier_cols,
                                                       output_level=L.TOKEN,
                                                       node=NLP_FEATURE_NODES.nodes[
                                                           A.LONGFORMER_FOR_TOKEN_CLASSIFICATION],
                                                       description='LongformerForTokenClassification can load Longformer Models with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks.',
                                                       provider=ComponentBackends.open_source,
                                                       license=Licenses.open_source,
                                                       computation_context=ComputeContexts.spark,
                                                       output_context=ComputeContexts.spark,
                                                       jsl_anno_class_id=A.LONGFORMER_FOR_TOKEN_CLASSIFICATION,
                                                       jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                           A.LONGFORMER_FOR_TOKEN_CLASSIFICATION],
                                                       ),
        A.MARIAN_TRANSFORMER: partial(NluComponent,
                                      name=A.MARIAN_TRANSFORMER,
                                      type=T.DOCUMENT_CLASSIFIER,
                                      get_default_model=Marian.get_default_model,
                                      get_pretrained_model=Marian.get_pretrained_model,
                                      pdf_extractor_methods={'default': default_marian_config,
                                                             'default_full': default_full_config, },
                                      pdf_col_name_substitutor=substitute_marian_cols,
                                      output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                      node=NLP_FEATURE_NODES.nodes[A.MARIAN_TRANSFORMER],
                                      description='Marian is an efficient, free Neural Machine Translation framework written in pure C++ with minimal dependencies. It is mainly being developed by the Microsoft Translator team. Many academic (most notably the University of Edinburgh and in the past the Adam Mickiewicz University in Poznań) and commercial contributors help with its development. MarianTransformer uses the models trained by MarianNMT.',
                                      provider=ComponentBackends.open_source,
                                      license=Licenses.open_source,
                                      computation_context=ComputeContexts.spark,
                                      output_context=ComputeContexts.spark,
                                      jsl_anno_class_id=A.MARIAN_TRANSFORMER,
                                      jsl_anno_py_class=ACR.JSL_anno2_py_class[A.MARIAN_TRANSFORMER],
                                      ),
        A.MPNET_SENTENCE_EMBEDDINGS: partial(NluComponent,
                                            name=A.MPNET_SENTENCE_EMBEDDINGS,
                                            type=T.DOCUMENT_EMBEDDING,
                                            get_default_model=MPNetSentence.get_default_model,
                                            get_pretrained_model=MPNetSentence.get_pretrained_model,
                                            pdf_extractor_methods={'default': default_sentence_embedding_config,
                                                                   'default_full': default_full_config, },
                                            pdf_col_name_substitutor=substitute_sent_embed_cols,
                                            output_level=L.INPUT_DEPENDENT_DOCUMENT_EMBEDDING,
                                            node=NLP_FEATURE_NODES.nodes[A.MPNET_SENTENCE_EMBEDDINGS],
                                            description='Sentence-level embeddings using BERT. BERT (Bidirectional Encoder Representations from Transformers) provides dense vector representations for natural language by using a deep, pre-trained neural network with the Transformer architecture.',
                                            provider=ComponentBackends.open_source,
                                            license=Licenses.open_source,
                                            computation_context=ComputeContexts.spark,
                                            output_context=ComputeContexts.spark,
                                            jsl_anno_class_id=A.MPNET_SENTENCE_EMBEDDINGS,
                                            jsl_anno_py_class=ACR.JSL_anno2_py_class[A.MPNET_SENTENCE_EMBEDDINGS],
                                            has_storage_ref=True,
                                            is_storage_ref_producer=True,
                                            ),
        A.MPNET_FOR_SEQUENCE_CLASSIFICATION: partial(NluComponent,
                                            name=A.MPNET_FOR_SEQUENCE_CLASSIFICATION,
                                            type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                            get_default_model=SeqMPNetClassifier.get_default_model,
                                            get_pretrained_model=SeqMPNetClassifier.get_pretrained_model,
                                            pdf_extractor_methods={'default': default_classifier_dl_config,
                                                                   'default_full': default_full_config, },
                                            pdf_col_name_substitutor=substitute_seq_bert_classifier_cols,
                                            output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                            node=NLP_FEATURE_NODES.nodes[A.MPNET_FOR_SEQUENCE_CLASSIFICATION],
                                            description='MPNetForSequenceClassification can load MPNet Models with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for multi-class document classification tasks.',
                                            provider=ComponentBackends.open_source,
                                            license=Licenses.open_source,
                                            computation_context=ComputeContexts.spark,
                                            output_context=ComputeContexts.spark,
                                            jsl_anno_class_id=A.MPNET_FOR_SEQUENCE_CLASSIFICATION,
                                            jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                            A.MPNET_FOR_SEQUENCE_CLASSIFICATION],
                                                     ),
        A.ROBERTA_EMBEDDINGS: partial(NluComponent,
                                      name=A.ROBERTA_EMBEDDINGS,
                                      type=T.TOKEN_EMBEDDING,
                                      get_default_model=Roberta.get_default_model,
                                      get_pretrained_model=Roberta.get_pretrained_model,
                                      pdf_extractor_methods={'default': default_word_embedding_config,
                                                             'default_full': default_full_config, },
                                      pdf_col_name_substitutor=substitute_word_embed_cols,
                                      output_level=L.TOKEN,
                                      node=NLP_FEATURE_NODES.nodes[A.ROBERTA_EMBEDDINGS],
                                      description='The RoBERTa model_anno_obj was proposed in RoBERTa: A Robustly Optimized BERT Pretraining Approach by Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, Veselin Stoyanov. It is based on Google’s BERT model_anno_obj released in 2018.',
                                      provider=ComponentBackends.open_source,
                                      license=Licenses.open_source,
                                      computation_context=ComputeContexts.spark,
                                      output_context=ComputeContexts.spark,
                                      jsl_anno_class_id=A.ROBERTA_EMBEDDINGS,
                                      jsl_anno_py_class=ACR.JSL_anno2_py_class[A.ROBERTA_EMBEDDINGS],
                                      has_storage_ref=True,
                                      is_storage_ref_producer=True,
                                      ),
        A.ROBERTA_FOR_TOKEN_CLASSIFICATION: partial(NluComponent,
                                                    name=A.ROBERTA_FOR_TOKEN_CLASSIFICATION,
                                                    type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                    get_default_model=TokenRoBerta.get_default_model,
                                                    get_pretrained_model=TokenRoBerta.get_pretrained_model,
                                                    pdf_extractor_methods={'default': default_token_classifier_config,
                                                                           'default_full': default_full_config, },
                                                    pdf_col_name_substitutor=substitute_transformer_token_classifier_cols,
                                                    output_level=L.TOKEN,  # Handled like NER model_anno_obj
                                                    node=NLP_FEATURE_NODES.nodes[A.ROBERTA_FOR_TOKEN_CLASSIFICATION],
                                                    description='RoBertaForTokenClassification can load RoBERTa Models with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks.',
                                                    provider=ComponentBackends.open_source,
                                                    license=Licenses.open_source,
                                                    computation_context=ComputeContexts.spark,
                                                    output_context=ComputeContexts.spark,
                                                    jsl_anno_class_id=A.ROBERTA_FOR_TOKEN_CLASSIFICATION,
                                                    jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                        A.ROBERTA_FOR_TOKEN_CLASSIFICATION],
                                                    ),
        A.ROBERTA_SENTENCE_EMBEDDINGS: partial(NluComponent,
                                               name=A.ROBERTA_SENTENCE_EMBEDDINGS,
                                               type=T.DOCUMENT_EMBEDDING,
                                               get_default_model=RobertaSentence.get_default_model,
                                               get_pretrained_model=RobertaSentence.get_pretrained_model,
                                               pdf_extractor_methods={'default': default_sentence_embedding_config,
                                                                      'default_full': default_full_config, },
                                               pdf_col_name_substitutor=substitute_sent_embed_cols,
                                               output_level=L.INPUT_DEPENDENT_DOCUMENT_EMBEDDING,
                                               node=NLP_FEATURE_NODES.nodes[A.ROBERTA_SENTENCE_EMBEDDINGS],
                                               description='Sentence-level embeddings using BERT. BERT (Bidirectional Encoder Representations from Transformers) provides dense vector representations for natural language by using a deep, pre-trained neural network with the Transformer architecture.',
                                               provider=ComponentBackends.open_source,
                                               license=Licenses.open_source,
                                               computation_context=ComputeContexts.spark,
                                               output_context=ComputeContexts.spark,
                                               jsl_anno_class_id=A.ROBERTA_SENTENCE_EMBEDDINGS,
                                               jsl_anno_py_class=ACR.JSL_anno2_py_class[A.ROBERTA_SENTENCE_EMBEDDINGS],
                                               has_storage_ref=True,
                                               is_storage_ref_producer=True,
                                               ),
        A.T5_TRANSFORMER: partial(NluComponent,
                                  # TODO  task based construction, i.e. get_preconfigured_model
                                  name=A.T5_TRANSFORMER,
                                  type=T.DOCUMENT_CLASSIFIER,
                                  get_default_model=T5.get_default_model,
                                  get_pretrained_model=T5.get_pretrained_model,
                                  pdf_extractor_methods={'default': default_T5_config,
                                                         'default_full': default_full_config, },
                                  pdf_col_name_substitutor=substitute_T5_cols,
                                  output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                  node=NLP_FEATURE_NODES.nodes[A.T5_TRANSFORMER],
                                  description='T5 reconsiders all NLP tasks into a unified text-to-text-format where the input and output are always text strings, in contrast to BERT-style models that can only output either a class label or a span of the input. The text-to-text framework is able to use the same model_anno_obj, loss function, and hyper-parameters on any NLP task, including machine translation, document summarization, question answering, and classification tasks (e.g., sentiment analysis). T5 can even apply to regression tasks by training it to predict the string representation of a number instead of the number itself.',
                                  provider=ComponentBackends.open_source,
                                  license=Licenses.open_source,
                                  computation_context=ComputeContexts.spark,
                                  output_context=ComputeContexts.spark,
                                  jsl_anno_class_id=A.T5_TRANSFORMER,
                                  jsl_anno_py_class=ACR.JSL_anno2_py_class[A.T5_TRANSFORMER],
                                  ),

        H_A.MEDICAL_SUMMARIZER: partial(NluComponent,
                                        name=H_A.MEDICAL_SUMMARIZER,
                                        type=T.DOCUMENT_CLASSIFIER,
                                        get_default_model=MedSummarizer.get_default_model,
                                        get_pretrained_model=MedSummarizer.get_pretrained_model,
                                        pdf_extractor_methods={'default': default_T5_config,
                                                               'default_full': default_full_config, },
                                        pdf_col_name_substitutor=substitute_summarizer_cols,
                                        output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                        node=NLP_HC_FEATURE_NODES.nodes[H_A.MEDICAL_SUMMARIZER],
                                        description='Medical summarizer',
                                        provider=ComponentBackends.open_source,
                                        license=Licenses.open_source,
                                        computation_context=ComputeContexts.spark,
                                        output_context=ComputeContexts.spark,
                                        jsl_anno_class_id=H_A.MEDICAL_SUMMARIZER,
                                        jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.MEDICAL_SUMMARIZER],
                                        ),

        A.BART_TRANSFORMER: partial(NluComponent,
                                    name=A.BART_TRANSFORMER,
                                    type=T.DOCUMENT_CLASSIFIER,
                                    get_default_model=SparkNLPBartTransformer.get_default_model,
                                    get_pretrained_model=SparkNLPBartTransformer.get_pretrained_model,
                                    pdf_extractor_methods={'default': default_gpt2_config,
                                                           'default_full': default_full_config, },
                                    pdf_col_name_substitutor=substitute_gpt2_cols,
                                    output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                    node=NLP_FEATURE_NODES.nodes[A.BART_TRANSFORMER],
                                    description='Bart Transformer',
                                    provider=ComponentBackends.open_source,
                                    license=Licenses.open_source,
                                    computation_context=ComputeContexts.spark,
                                    output_context=ComputeContexts.spark,
                                    jsl_anno_class_id=A.BART_TRANSFORMER,
                                    jsl_anno_py_class=ACR.JSL_anno2_py_class[A.BART_TRANSFORMER],
                                    ),

        A.M2M100_TRANSFORMER: partial(NluComponent,
                                    name=A.M2M100_TRANSFORMER,
                                    type=T.DOCUMENT_CLASSIFIER,
                                    get_default_model=M2M100.get_default_model,
                                    get_pretrained_model=M2M100.get_pretrained_model,
                                    pdf_extractor_methods={'default': default_gpt2_config,
                                                           'default_full': default_full_config, },
                                    pdf_col_name_substitutor=substitute_gpt2_cols,
                                    output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                    node=NLP_FEATURE_NODES.nodes[A.M2M100_TRANSFORMER],
                                    description='Bart Transformer',
                                    provider=ComponentBackends.open_source,
                                    license=Licenses.open_source,
                                    computation_context=ComputeContexts.spark,
                                    output_context=ComputeContexts.spark,
                                    jsl_anno_class_id=A.M2M100_TRANSFORMER,
                                    jsl_anno_py_class=ACR.JSL_anno2_py_class[A.M2M100_TRANSFORMER],
                                    ),


        H_A.MEDICAL_TEXT_GENERATOR: partial(NluComponent,
                                            name=H_A.MEDICAL_TEXT_GENERATOR,
                                            type=T.DOCUMENT_CLASSIFIER,
                                            get_default_model=MedTextGenerator.get_default_model,
                                            get_pretrained_model=MedTextGenerator.get_pretrained_model,
                                            pdf_extractor_methods={'default': default_gpt2_config,
                                                                   'default_full': default_full_config, },
                                            pdf_col_name_substitutor=substitute_gpt2_cols,
                                            output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                            node=NLP_HC_FEATURE_NODES.nodes[H_A.MEDICAL_TEXT_GENERATOR],
                                            description='Medical Text Generator',
                                            provider=ComponentBackends.open_source,
                                            license=Licenses.open_source,
                                            computation_context=ComputeContexts.spark,
                                            output_context=ComputeContexts.spark,
                                            jsl_anno_class_id=H_A.MEDICAL_TEXT_GENERATOR,
                                            jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                H_A.MEDICAL_TEXT_GENERATOR],
                                            ),

        A.UNIVERSAL_SENTENCE_ENCODER: partial(NluComponent,
                                              name=A.UNIVERSAL_SENTENCE_ENCODER,
                                              type=T.DOCUMENT_EMBEDDING,
                                              get_default_model=SparkNLPUse.get_default_model,
                                              get_pretrained_model=SparkNLPUse.get_pretrained_model,
                                              pdf_extractor_methods={'default': default_sentence_embedding_config,
                                                                     'default_full': default_full_config, },
                                              pdf_col_name_substitutor=substitute_sent_embed_cols,
                                              output_level=L.INPUT_DEPENDENT_DOCUMENT_EMBEDDING,
                                              node=NLP_FEATURE_NODES.nodes[A.UNIVERSAL_SENTENCE_ENCODER],
                                              description='The Universal Sentence Encoder encodes text into high dimensional vectors that can be used for text classification, semantic similarity, clustering and other natural language tasks.',
                                              provider=ComponentBackends.open_source,
                                              license=Licenses.open_source,
                                              computation_context=ComputeContexts.spark,
                                              output_context=ComputeContexts.spark,
                                              jsl_anno_class_id=A.UNIVERSAL_SENTENCE_ENCODER,
                                              jsl_anno_py_class=ACR.JSL_anno2_py_class[A.UNIVERSAL_SENTENCE_ENCODER],
                                              has_storage_ref=True,
                                              is_storage_ref_producer=True,
                                              ),

        A.XLM_ROBERTA_EMBEDDINGS: partial(NluComponent,
                                          name=A.XLM_ROBERTA_EMBEDDINGS,
                                          type=T.TOKEN_EMBEDDING,
                                          get_default_model=XLM.get_default_model,
                                          get_pretrained_model=XLM.get_pretrained_model,
                                          pdf_extractor_methods={'default': default_word_embedding_config,
                                                                 'default_full': default_full_config, },
                                          pdf_col_name_substitutor=substitute_word_embed_cols,
                                          output_level=L.TOKEN,
                                          node=NLP_FEATURE_NODES.nodes[A.XLM_ROBERTA_EMBEDDINGS],
                                          description='The XLM-RoBERTa model_anno_obj was proposed in Unsupervised Cross-lingual Representation Learning at Scale by Alexis Conneau, Kartikay Khandelwal, Naman Goyal, Vishrav Chaudhary, Guillaume Wenzek, Francisco GuzmÃ¡n, Edouard Grave, Myle Ott, Luke Zettlemoyer and Veselin Stoyanov. It is based on Facebook’s RoBERTa model_anno_obj released in 2019. It is a large multi-lingual language model_anno_obj, trained on 2.5TB of filtered CommonCrawl data.',
                                          provider=ComponentBackends.open_source,
                                          license=Licenses.open_source,
                                          computation_context=ComputeContexts.spark,
                                          output_context=ComputeContexts.spark,
                                          jsl_anno_class_id=A.XLM_ROBERTA_EMBEDDINGS,
                                          jsl_anno_py_class=ACR.JSL_anno2_py_class[A.XLM_ROBERTA_EMBEDDINGS],
                                          has_storage_ref=True,
                                          is_storage_ref_producer=True,
                                          ),

        A.XLM_ROBERTA_FOR_TOKEN_CLASSIFICATION: partial(NluComponent,
                                                        name=A.XLM_ROBERTA_FOR_TOKEN_CLASSIFICATION,
                                                        type=T.TRANSFORMER_TOKEN_CLASSIFIER,
                                                        get_default_model=TokenXlmRoBerta.get_default_model,
                                                        get_pretrained_model=TokenXlmRoBerta.get_pretrained_model,
                                                        pdf_extractor_methods={
                                                            'default': default_token_classifier_config,
                                                            'default_full': default_full_config, },
                                                        pdf_col_name_substitutor=substitute_transformer_token_classifier_cols,
                                                        output_level=L.TOKEN,
                                                        node=NLP_FEATURE_NODES.nodes[
                                                            A.XLM_ROBERTA_FOR_TOKEN_CLASSIFICATION],
                                                        description='XlmRoBertaForTokenClassification can load XLM-RoBERTa Models with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks.',
                                                        provider=ComponentBackends.open_source,
                                                        license=Licenses.open_source,
                                                        computation_context=ComputeContexts.spark,
                                                        output_context=ComputeContexts.spark,
                                                        jsl_anno_class_id=A.XLM_ROBERTA_FOR_TOKEN_CLASSIFICATION,
                                                        jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                            A.XLM_ROBERTA_FOR_TOKEN_CLASSIFICATION],
                                                        ),
        A.XLM_ROBERTA_SENTENCE_EMBEDDINGS: partial(NluComponent,
                                                   name=A.XLM_ROBERTA_SENTENCE_EMBEDDINGS,
                                                   type=T.DOCUMENT_EMBEDDING,
                                                   get_default_model=Sentence_XLM.get_default_model,
                                                   get_pretrained_model=Sentence_XLM.get_pretrained_model,
                                                   pdf_extractor_methods={'default': default_sentence_embedding_config,
                                                                          'default_full': default_full_config, },
                                                   pdf_col_name_substitutor=substitute_sent_embed_cols,
                                                   output_level=L.INPUT_DEPENDENT_DOCUMENT_EMBEDDING,
                                                   node=NLP_FEATURE_NODES.nodes[A.XLM_ROBERTA_SENTENCE_EMBEDDINGS],
                                                   description='Sentence-level embeddings using XLM-RoBERTa. The XLM-RoBERTa model_anno_obj was proposed in Unsupervised Cross-lingual Representation Learning at Scale by Alexis Conneau, Kartikay Khandelwal, Naman Goyal, Vishrav Chaudhary, Guillaume Wenzek, Francisco GuzmÃ¡n, Edouard Grave, Myle Ott, Luke Zettlemoyer and Veselin Stoyanov. It is based on Facebook’s RoBERTa model_anno_obj released in 2019. It is a large multi-lingual language model_anno_obj, trained on 2.5TB of filtered CommonCrawl data.',
                                                   provider=ComponentBackends.open_source,
                                                   license=Licenses.open_source,
                                                   computation_context=ComputeContexts.spark,
                                                   output_context=ComputeContexts.spark,
                                                   jsl_anno_class_id=A.XLM_ROBERTA_SENTENCE_EMBEDDINGS,
                                                   jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                       A.XLM_ROBERTA_SENTENCE_EMBEDDINGS],
                                                   has_storage_ref=True,
                                                   is_storage_ref_producer=True,
                                                   ),
        A.XLNET_EMBEDDINGS: partial(NluComponent,
                                    name=A.XLNET_EMBEDDINGS,
                                    type=T.TOKEN_EMBEDDING,
                                    get_default_model=SparkNLPXlnet.get_default_model,
                                    get_pretrained_model=SparkNLPXlnet.get_pretrained_model,
                                    pdf_extractor_methods={'default': default_word_embedding_config,
                                                           'default_full': default_full_config, },
                                    pdf_col_name_substitutor=substitute_word_embed_cols,
                                    output_level=L.TOKEN,
                                    node=NLP_FEATURE_NODES.nodes[A.XLNET_EMBEDDINGS],
                                    description='XLNet is a new unsupervised language representation learning method based on a novel generalized permutation language modeling objective. Additionally, XLNet employs Transformer-XL as the backbone model_anno_obj, exhibiting excellent performance for language tasks involving long context. Overall, XLNet achieves state-of-the-art (SOTA) results on various downstream language tasks including question answering, natural language inference, sentiment analysis, and document ranking.',
                                    provider=ComponentBackends.open_source,
                                    license=Licenses.open_source,
                                    computation_context=ComputeContexts.spark,
                                    output_context=ComputeContexts.spark,
                                    jsl_anno_class_id=A.XLNET_EMBEDDINGS,
                                    jsl_anno_py_class=ACR.JSL_anno2_py_class[A.XLNET_EMBEDDINGS],
                                    has_storage_ref=True,
                                    is_storage_ref_producer=True,
                                    ),
        A.XLNET_FOR_TOKEN_CLASSIFICATION: partial(NluComponent,
                                                  name=A.XLNET_FOR_TOKEN_CLASSIFICATION,
                                                  type=T.TRANSFORMER_TOKEN_CLASSIFIER,
                                                  get_default_model=TokenXlnet.get_default_model,
                                                  get_pretrained_model=TokenXlnet.get_pretrained_model,
                                                  pdf_extractor_methods={'default': default_token_classifier_config,
                                                                         'default_full': default_full_config, },
                                                  pdf_col_name_substitutor=substitute_transformer_token_classifier_cols,
                                                  output_level=L.TOKEN,
                                                  node=NLP_FEATURE_NODES.nodes[A.XLNET_FOR_TOKEN_CLASSIFICATION],
                                                  description='XlnetForTokenClassification can load XLNet Models with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks.',
                                                  provider=ComponentBackends.open_source,
                                                  license=Licenses.open_source,
                                                  computation_context=ComputeContexts.spark,
                                                  output_context=ComputeContexts.spark,
                                                  jsl_anno_class_id=A.XLNET_FOR_TOKEN_CLASSIFICATION,
                                                  jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                      A.XLNET_FOR_TOKEN_CLASSIFICATION],
                                                  ),

        A.XLM_ROBERTA_FOR_SEQUENCE_CLASSIFICATION: partial(NluComponent,
                                                           name=A.XLM_ROBERTA_FOR_SEQUENCE_CLASSIFICATION,
                                                           type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                           get_default_model=SeqXlmRobertaClassifier.get_default_model,
                                                           get_pretrained_model=SeqXlmRobertaClassifier.get_pretrained_model,
                                                           pdf_extractor_methods={
                                                               'default': default_classifier_dl_config,
                                                               'default_full': default_full_config, },
                                                           pdf_col_name_substitutor=substitute_seq_bert_classifier_cols,
                                                           output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                           node=NLP_FEATURE_NODES.nodes[
                                                               A.XLM_ROBERTA_FOR_SEQUENCE_CLASSIFICATION],
                                                           description='XlmRoBertaForSequenceClassification can load XLM-RoBERTa Models with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for multi-class document classification task',
                                                           provider=ComponentBackends.open_source,
                                                           license=Licenses.open_source,
                                                           computation_context=ComputeContexts.spark,
                                                           output_context=ComputeContexts.spark,
                                                           jsl_anno_class_id=A.XLM_ROBERTA_FOR_SEQUENCE_CLASSIFICATION,
                                                           jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                               A.XLM_ROBERTA_FOR_SEQUENCE_CLASSIFICATION],
                                                           ),

        A.ROBERTA_FOR_SEQUENCE_CLASSIFICATION: partial(NluComponent,
                                                       name=A.ROBERTA_FOR_SEQUENCE_CLASSIFICATION,
                                                       type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                       get_default_model=SeqRobertaClassifier.get_default_model,
                                                       get_pretrained_model=SeqRobertaClassifier.get_pretrained_model,
                                                       pdf_extractor_methods={'default': default_classifier_dl_config,
                                                                              'default_full': default_full_config, },
                                                       pdf_col_name_substitutor=substitute_seq_bert_classifier_cols,
                                                       output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                       node=NLP_FEATURE_NODES.nodes[
                                                           A.ROBERTA_FOR_SEQUENCE_CLASSIFICATION],
                                                       description='RoBertaForSequenceClassification can load RoBERTa Models with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for multi-class document classification tasks',
                                                       provider=ComponentBackends.open_source,
                                                       license=Licenses.open_source,
                                                       computation_context=ComputeContexts.spark,
                                                       output_context=ComputeContexts.spark,
                                                       jsl_anno_class_id=A.ROBERTA_FOR_SEQUENCE_CLASSIFICATION,
                                                       jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                           A.ROBERTA_FOR_SEQUENCE_CLASSIFICATION],
                                                       ),

        A.LONGFORMER_FOR_SEQUENCE_CLASSIFICATION: partial(NluComponent,
                                                          name=A.LONGFORMER_FOR_SEQUENCE_CLASSIFICATION,
                                                          type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                          get_default_model=SeqLongformerClassifier.get_default_model,
                                                          get_pretrained_model=SeqLongformerClassifier.get_pretrained_model,
                                                          pdf_extractor_methods={
                                                              'default': default_seq_classifier_config,
                                                              'default_full': default_full_config, },
                                                          pdf_col_name_substitutor=substitute_seq_bert_classifier_cols,
                                                          output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                          node=NLP_FEATURE_NODES.nodes[
                                                              A.LONGFORMER_FOR_SEQUENCE_CLASSIFICATION],
                                                          description='RoBertaForSequenceClassification can load RoBERTa Models with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for multi-class document classification tasks',
                                                          provider=ComponentBackends.open_source,
                                                          license=Licenses.open_source,
                                                          computation_context=ComputeContexts.spark,
                                                          output_context=ComputeContexts.spark,
                                                          jsl_anno_class_id=A.LONGFORMER_FOR_SEQUENCE_CLASSIFICATION,
                                                          jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                              A.LONGFORMER_FOR_SEQUENCE_CLASSIFICATION],
                                                          ),
        A.ALBERT_FOR_SEQUENCE_CLASSIFICATION: partial(NluComponent,
                                                      name=A.ALBERT_FOR_SEQUENCE_CLASSIFICATION,
                                                      type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                      get_default_model=SeqAlbertClassifier.get_default_model,
                                                      get_pretrained_model=SeqAlbertClassifier.get_pretrained_model,
                                                      pdf_extractor_methods={'default': default_seq_classifier_config,
                                                                             'default_full': default_full_config, },
                                                      pdf_col_name_substitutor=substitute_seq_bert_classifier_cols,
                                                      output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                      node=NLP_FEATURE_NODES.nodes[
                                                          A.ALBERT_FOR_SEQUENCE_CLASSIFICATION],
                                                      description='AlbertForSequenceClassification can load ALBERT Models with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for multi-class document classification tasks.',
                                                      provider=ComponentBackends.open_source,
                                                      license=Licenses.open_source,
                                                      computation_context=ComputeContexts.spark,
                                                      output_context=ComputeContexts.spark,
                                                      jsl_anno_class_id=A.ALBERT_FOR_SEQUENCE_CLASSIFICATION,
                                                      jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                          A.ALBERT_FOR_SEQUENCE_CLASSIFICATION],
                                                      ),

        A.XLNET_FOR_SEQUENCE_CLASSIFICATION: partial(NluComponent,
                                                     name=A.XLNET_FOR_SEQUENCE_CLASSIFICATION,
                                                     type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                     get_default_model=SeqXlnetClassifier.get_default_model,
                                                     get_pretrained_model=SeqXlnetClassifier.get_pretrained_model,
                                                     pdf_extractor_methods={'default': default_seq_classifier_config,
                                                                            'default_full': default_full_config, },
                                                     pdf_col_name_substitutor=substitute_seq_bert_classifier_cols,
                                                     output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                     node=NLP_FEATURE_NODES.nodes[A.XLNET_FOR_SEQUENCE_CLASSIFICATION],
                                                     description='AlbertForSequenceClassification can load ALBERT Models with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for multi-class document classification tasks.',
                                                     provider=ComponentBackends.open_source,
                                                     license=Licenses.open_source,
                                                     computation_context=ComputeContexts.spark,
                                                     output_context=ComputeContexts.spark,
                                                     jsl_anno_class_id=A.XLNET_FOR_SEQUENCE_CLASSIFICATION,
                                                     jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                         A.XLNET_FOR_SEQUENCE_CLASSIFICATION],
                                                     ),
        A.DISTIL_BERT_FOR_ZERO_SHOT_CLASSIFICATION: partial(NluComponent,
                                                            name=A.DISTIL_BERT_FOR_ZERO_SHOT_CLASSIFICATION,
                                                            type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                            get_default_model=DistilBertZeroShotClassifier.get_default_model,
                                                            get_pretrained_model=DistilBertZeroShotClassifier.get_pretrained_model,
                                                            pdf_extractor_methods={
                                                                'default': default_seq_classifier_config,
                                                                'default_full': default_full_config, },
                                                            pdf_col_name_substitutor=substitute_seq_bert_classifier_cols,
                                                            output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                            node=NLP_FEATURE_NODES.nodes[
                                                                A.DISTIL_BERT_FOR_ZERO_SHOT_CLASSIFICATION],
                                                            description='Distil Bert Zero Shot Classifier.',
                                                            provider=ComponentBackends.open_source,
                                                            license=Licenses.open_source,
                                                            computation_context=ComputeContexts.spark,
                                                            output_context=ComputeContexts.spark,
                                                            jsl_anno_class_id=A.DISTIL_BERT_FOR_ZERO_SHOT_CLASSIFICATION,
                                                            jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                                A.DISTIL_BERT_FOR_ZERO_SHOT_CLASSIFICATION],
                                                            ),


        A.DEBERTA_FOR_ZERO_SHOT_CLASSIFICATION: partial(NluComponent,
                                                            name=A.DEBERTA_FOR_ZERO_SHOT_CLASSIFICATION,
                                                            type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                            get_default_model=DeBertaZeroShotClassifier.get_default_model,
                                                            get_pretrained_model=DeBertaZeroShotClassifier.get_pretrained_model,
                                                            pdf_extractor_methods={
                                                                'default': default_seq_classifier_config,
                                                                'default_full': default_full_config, },
                                                            pdf_col_name_substitutor=substitute_seq_bert_classifier_cols,
                                                            output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                            node=NLP_FEATURE_NODES.nodes[
                                                                A.DEBERTA_FOR_ZERO_SHOT_CLASSIFICATION],
                                                            description='DeBerta Zero Shot Classifier.',
                                                            provider=ComponentBackends.open_source,
                                                            license=Licenses.open_source,
                                                            computation_context=ComputeContexts.spark,
                                                            output_context=ComputeContexts.spark,
                                                            jsl_anno_class_id=A.DEBERTA_FOR_ZERO_SHOT_CLASSIFICATION,
                                                            jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                                A.DEBERTA_FOR_ZERO_SHOT_CLASSIFICATION],
                                                            ),







        A.BERT_FOR_ZERO_SHOT_CLASSIFICATION: partial(NluComponent,
                                                     name=A.BERT_FOR_ZERO_SHOT_CLASSIFICATION,
                                                     type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                     get_default_model=BertZeroShotClassifier.get_default_model,
                                                     get_pretrained_model=BertZeroShotClassifier.get_pretrained_model,
                                                     pdf_extractor_methods={'default': default_seq_classifier_config,
                                                                            'default_full': default_full_config, },
                                                     pdf_col_name_substitutor=substitute_seq_bert_classifier_cols,
                                                     output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                     node=NLP_FEATURE_NODES.nodes[
                                                         A.BERT_FOR_ZERO_SHOT_CLASSIFICATION],
                                                     description='Bert Zero Shot Classifier.',
                                                     provider=ComponentBackends.open_source,
                                                     license=Licenses.open_source,
                                                     computation_context=ComputeContexts.spark,
                                                     output_context=ComputeContexts.spark,
                                                     jsl_anno_class_id=A.BERT_FOR_ZERO_SHOT_CLASSIFICATION,
                                                     jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                         A.BERT_FOR_ZERO_SHOT_CLASSIFICATION],
                                                     ),
        A.BART_FOR_ZERO_SHOT_CLASSIFICATION: partial(NluComponent,
                                                     name=A.BART_FOR_ZERO_SHOT_CLASSIFICATION,
                                                     type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                     get_default_model=BartZeroShotClassifier.get_default_model,
                                                     get_pretrained_model=BartZeroShotClassifier.get_pretrained_model,
                                                     pdf_extractor_methods={'default': default_seq_classifier_config,
                                                                            'default_full': default_full_config, },
                                                     pdf_col_name_substitutor=substitute_seq_bert_classifier_cols,
                                                     output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                     node=NLP_FEATURE_NODES.nodes[
                                                         A.BART_FOR_ZERO_SHOT_CLASSIFICATION],
                                                     description='Bart Zero Shot Classifier.',
                                                     provider=ComponentBackends.open_source,
                                                     license=Licenses.open_source,
                                                     computation_context=ComputeContexts.spark,
                                                     output_context=ComputeContexts.spark,
                                                     jsl_anno_class_id=A.BART_FOR_ZERO_SHOT_CLASSIFICATION,
                                                     jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                         A.BART_FOR_ZERO_SHOT_CLASSIFICATION],
                                                     ),
        A.GPT2: partial(NluComponent,
                        name=A.GPT2,
                        type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                        get_default_model=GPT2.get_default_model,
                        get_pretrained_model=GPT2.get_pretrained_model,
                        pdf_extractor_methods={'default': default_gpt2_config, 'default_full': default_full_config, },
                        pdf_col_name_substitutor=substitute_gpt2_cols,  # TIODO TESt
                        output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                        node=NLP_FEATURE_NODES.nodes[A.GPT2],
                        description='AlbertForSequenceClassification can load ALBERT Models with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for multi-class document classification tasks.',
                        provider=ComponentBackends.open_source,
                        license=Licenses.open_source,
                        computation_context=ComputeContexts.spark,
                        output_context=ComputeContexts.spark,
                        jsl_anno_class_id=A.GPT2,
                        jsl_anno_py_class=ACR.JSL_anno2_py_class[A.GPT2],
                        ),

        A.OPENAI_COMPLETION: partial(NluComponent,
                        name=A.OPENAI_COMPLETION,
                        type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                        get_default_model=OpenaiCompletion.get_default_model,
                        get_pretrained_model=OpenaiCompletion.get_pretrained_model,
                        pdf_extractor_methods={'default': default_gpt2_config, 'default_full': default_full_config, },
                        pdf_col_name_substitutor=substitute_gpt2_cols,  # TIODO TESt
                        output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                        node=NLP_FEATURE_NODES.nodes[A.OPENAI_COMPLETION],
                        description='',
                        provider=ComponentBackends.open_source,
                        license=Licenses.open_source,
                        computation_context=ComputeContexts.spark,
                        output_context=ComputeContexts.spark,
                        jsl_anno_class_id=A.OPENAI_COMPLETION,
                        jsl_anno_py_class=ACR.JSL_anno2_py_class[A.OPENAI_COMPLETION],
                        ),

        A.WORD_2_VEC: partial(NluComponent,  # TOOD
                              name=A.WORD_2_VEC,
                              type=T.TOKEN_EMBEDDING,
                              get_default_model=Word2Vec.get_default_model,
                              get_pretrained_model=Word2Vec.get_pretrained_model,
                              get_trainable_model=Word2Vec.get_trainable_model,
                              pdf_extractor_methods={'default': default_word_embedding_config,
                                                     'default_full': default_full_config, },
                              pdf_col_name_substitutor=substitute_word_embed_cols,  # TODO?
                              output_level=L.TOKEN,
                              node=NLP_FEATURE_NODES.nodes[A.WORD_2_VEC],
                              description='We use Word2Vec implemented in Spark ML. It uses skip-gram model_anno_obj in our implementation and a hierarchical softmax method to train the model_anno_obj. The variable names in the implementation match the original C implementation.',
                              provider=ComponentBackends.open_source,
                              license=Licenses.open_source,
                              computation_context=ComputeContexts.spark,
                              output_context=ComputeContexts.spark,
                              jsl_anno_class_id=A.WORD_2_VEC,
                              jsl_anno_py_class=ACR.JSL_anno2_py_class[A.WORD_2_VEC],
                              has_storage_ref=True,
                              is_storage_ref_producer=True,
                              ),

        A.DEBERTA_WORD_EMBEDDINGS: partial(NluComponent,
                                           name=A.DEBERTA_WORD_EMBEDDINGS,
                                           type=T.TOKEN_EMBEDDING,
                                           get_default_model=Deberta.get_default_model,
                                           get_pretrained_model=Deberta.get_pretrained_model,
                                           pdf_extractor_methods={'default': default_word_embedding_config,
                                                                  'default_full': default_full_config, },
                                           pdf_col_name_substitutor=substitute_word_embed_cols,
                                           output_level=L.TOKEN,
                                           node=NLP_FEATURE_NODES.nodes[A.DEBERTA_WORD_EMBEDDINGS],
                                           description='Token-level embeddings using DeBERTa. The DeBERTa model_anno_obj was proposed in DeBERTa: Decoding-enhanced BERT with Disentangled Attention by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen. It is based on Google’s BERT model_anno_obj released in 2018 and Facebook’s RoBERTa model_anno_obj released in 2019.',
                                           provider=ComponentBackends.open_source,
                                           license=Licenses.open_source,
                                           computation_context=ComputeContexts.spark,
                                           output_context=ComputeContexts.spark,
                                           jsl_anno_class_id=A.DEBERTA_WORD_EMBEDDINGS,
                                           jsl_anno_py_class=ACR.JSL_anno2_py_class[A.DEBERTA_WORD_EMBEDDINGS],
                                           has_storage_ref=True,
                                           is_storage_ref_producer=True,
                                           ),

        A.DEBERTA_FOR_SEQUENCE_CLASSIFICATION: partial(NluComponent,
                                                       name=A.DEBERTA_FOR_SEQUENCE_CLASSIFICATION,
                                                       type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                       get_default_model=SeqDebertaClassifier.get_default_model,
                                                       get_pretrained_model=SeqDebertaClassifier.get_pretrained_model,
                                                       pdf_extractor_methods={
                                                           'default': default_seq_classifier_config,
                                                           'default_full': default_full_config, },
                                                       pdf_col_name_substitutor=substitute_seq_bert_classifier_cols,
                                                       output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                       node=NLP_FEATURE_NODES.nodes[
                                                           A.DEBERTA_FOR_SEQUENCE_CLASSIFICATION],
                                                       description='The DeBERTa model_anno_obj was proposed in DeBERTa: Decoding-enhanced BERT with Disentangled Attention by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen. It is based on Google’s BERT model_anno_obj released in 2018 and Facebook’s RoBERTa model_anno_obj released in 2019. This classifier uses DeBERTa embeddingss with a linear classification head ontop.',
                                                       provider=ComponentBackends.open_source,

                                                       license=Licenses.open_source,
                                                       computation_context=ComputeContexts.spark,
                                                       output_context=ComputeContexts.spark,
                                                       jsl_anno_class_id=A.DEBERTA_FOR_SEQUENCE_CLASSIFICATION,
                                                       jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                           A.DEBERTA_FOR_SEQUENCE_CLASSIFICATION],
                                                       ),

        A.CAMEMBERT_FOR_QUESTION_ANSWERING: partial(NluComponent,
                                                    name=A.CAMEMBERT_FOR_QUESTION_ANSWERING,
                                                    jsl_anno_class_id=A.CAMEMBERT_FOR_QUESTION_ANSWERING,
                                                    jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                        A.CAMEMBERT_FOR_QUESTION_ANSWERING],
                                                    node=NLP_FEATURE_NODES.nodes[A.CAMEMBERT_FOR_QUESTION_ANSWERING],
                                                    get_default_model=SpanCamemBert.get_default_model,
                                                    get_pretrained_model=SpanCamemBert.get_pretrained_model,
                                                    type=T.QUESTION_SPAN_CLASSIFIER,
                                                    pdf_extractor_methods={
                                                        'default': default_span_classifier_config,
                                                        'default_full': default_full_span_classifier_config, },
                                                    pdf_col_name_substitutor=substitute_span_classifier_cols,
                                                    output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                    description='TODO',
                                                    provider=ComponentBackends.open_source,
                                                    license=Licenses.open_source,
                                                    computation_context=ComputeContexts.spark,
                                                    output_context=ComputeContexts.spark,
                                                    ),

        A.BERT_FOR_QUESTION_ANSWERING: partial(NluComponent,
                                               name=A.BERT_FOR_QUESTION_ANSWERING,
                                               jsl_anno_class_id=A.BERT_FOR_QUESTION_ANSWERING,
                                               jsl_anno_py_class=ACR.JSL_anno2_py_class[A.BERT_FOR_QUESTION_ANSWERING],
                                               node=NLP_FEATURE_NODES.nodes[A.BERT_FOR_QUESTION_ANSWERING],
                                               get_default_model=SpanBertClassifier.get_default_model,
                                               get_pretrained_model=SpanBertClassifier.get_pretrained_model,
                                               type=T.QUESTION_SPAN_CLASSIFIER,
                                               pdf_extractor_methods={
                                                   'default': default_span_classifier_config,
                                                   'default_full': default_full_span_classifier_config, },
                                               pdf_col_name_substitutor=substitute_span_classifier_cols,
                                               output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                               description='TODO',
                                               provider=ComponentBackends.open_source,
                                               license=Licenses.open_source,
                                               computation_context=ComputeContexts.spark,
                                               output_context=ComputeContexts.spark,
                                               ),
        A.ALBERT_FOR_QUESTION_ANSWERING: partial(NluComponent,
                                               name=A.ALBERT_FOR_QUESTION_ANSWERING,
                                               jsl_anno_class_id=A.ALBERT_FOR_QUESTION_ANSWERING,
                                               jsl_anno_py_class=ACR.JSL_anno2_py_class[A.ALBERT_FOR_QUESTION_ANSWERING],
                                               node=NLP_FEATURE_NODES.nodes[A.ALBERT_FOR_QUESTION_ANSWERING],
                                               get_default_model=SpanAlbertClassifier.get_default_model,
                                               get_pretrained_model=SpanAlbertClassifier.get_pretrained_model,
                                               type=T.QUESTION_SPAN_CLASSIFIER,
                                               pdf_extractor_methods={
                                                   'default': default_span_classifier_config,
                                                   'default_full': default_full_span_classifier_config, },
                                               pdf_col_name_substitutor=substitute_span_classifier_cols,
                                               output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                               description='TODO',
                                               provider=ComponentBackends.open_source,
                                               license=Licenses.open_source,
                                               computation_context=ComputeContexts.spark,
                                               output_context=ComputeContexts.spark,
                                               ),

        A.DE_BERTA_FOR_QUESTION_ANSWERING: partial(NluComponent,
                                                   name=A.DE_BERTA_FOR_QUESTION_ANSWERING,
                                                   jsl_anno_class_id=A.DE_BERTA_FOR_QUESTION_ANSWERING,
                                                   jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                       A.DE_BERTA_FOR_QUESTION_ANSWERING],
                                                   node=NLP_FEATURE_NODES.nodes[A.DE_BERTA_FOR_QUESTION_ANSWERING],
                                                   get_default_model=SpanDeBertaClassifier.get_default_model,
                                                   get_pretrained_model=SpanDeBertaClassifier.get_pretrained_model,
                                                   type=T.QUESTION_SPAN_CLASSIFIER,
                                                   pdf_extractor_methods={
                                                       'default': default_span_classifier_config,
                                                       'default_full': default_full_span_classifier_config, },
                                                   pdf_col_name_substitutor=substitute_span_classifier_cols,
                                                   output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                   description='TODO',
                                                   provider=ComponentBackends.open_source,
                                                   license=Licenses.open_source,
                                                   computation_context=ComputeContexts.spark,
                                                   output_context=ComputeContexts.spark,
                                                   ),

        A.DISTIL_BERT_FOR_QUESTION_ANSWERING: partial(NluComponent,
                                                      name=A.DISTIL_BERT_FOR_QUESTION_ANSWERING,
                                                      jsl_anno_class_id=A.DISTIL_BERT_FOR_QUESTION_ANSWERING,
                                                      jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                          A.DISTIL_BERT_FOR_QUESTION_ANSWERING],
                                                      node=NLP_FEATURE_NODES.nodes[
                                                          A.DISTIL_BERT_FOR_QUESTION_ANSWERING],
                                                      get_default_model=SpanDistilBertClassifier.get_default_model,
                                                      get_pretrained_model=SpanDistilBertClassifier.get_pretrained_model,
                                                      type=T.QUESTION_SPAN_CLASSIFIER,
                                                      pdf_extractor_methods={
                                                          'default': default_span_classifier_config,
                                                          'default_full': default_full_span_classifier_config, },
                                                      pdf_col_name_substitutor=substitute_span_classifier_cols,
                                                      output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                      description='TODO',
                                                      provider=ComponentBackends.open_source,
                                                      license=Licenses.open_source,
                                                      computation_context=ComputeContexts.spark,
                                                      output_context=ComputeContexts.spark,
                                                      ),

        A.LONGFORMER_FOR_QUESTION_ANSWERING: partial(NluComponent,
                                                     name=A.LONGFORMER_FOR_QUESTION_ANSWERING,
                                                     jsl_anno_class_id=A.LONGFORMER_FOR_QUESTION_ANSWERING,
                                                     jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                         A.LONGFORMER_FOR_QUESTION_ANSWERING],
                                                     node=NLP_FEATURE_NODES.nodes[A.LONGFORMER_FOR_QUESTION_ANSWERING],
                                                     get_default_model=SpanLongFormerClassifier.get_default_model,
                                                     get_pretrained_model=SpanLongFormerClassifier.get_pretrained_model,
                                                     type=T.QUESTION_SPAN_CLASSIFIER,
                                                     pdf_extractor_methods={
                                                         'default': default_span_classifier_config,
                                                         'default_full': default_full_span_classifier_config, },
                                                     pdf_col_name_substitutor=substitute_span_classifier_cols,
                                                     output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                     description='TODO',
                                                     provider=ComponentBackends.open_source,
                                                     license=Licenses.open_source,
                                                     computation_context=ComputeContexts.spark,
                                                     output_context=ComputeContexts.spark,
                                                     ),

        A.ROBERTA_FOR_QUESTION_ANSWERING: partial(NluComponent,
                                                  name=A.ROBERTA_FOR_QUESTION_ANSWERING,
                                                  jsl_anno_class_id=A.ROBERTA_FOR_QUESTION_ANSWERING,
                                                  jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                      A.ROBERTA_FOR_QUESTION_ANSWERING],
                                                  node=NLP_FEATURE_NODES.nodes[A.ROBERTA_FOR_QUESTION_ANSWERING],
                                                  get_default_model=SpanRobertaClassifier.get_default_model,
                                                  get_pretrained_model=SpanRobertaClassifier.get_pretrained_model,
                                                  type=T.QUESTION_SPAN_CLASSIFIER,
                                                  pdf_extractor_methods={
                                                      'default': default_span_classifier_config,
                                                      'default_full': default_full_span_classifier_config, },
                                                  pdf_col_name_substitutor=substitute_span_classifier_cols,
                                                  output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                  description='TODO',
                                                  provider=ComponentBackends.open_source,
                                                  license=Licenses.open_source,
                                                  computation_context=ComputeContexts.spark,
                                                  output_context=ComputeContexts.spark,
                                                  ),

        A.XLM_ROBERTA_FOR_QUESTION_ANSWERING: partial(NluComponent,
                                                      name=A.XLM_ROBERTA_FOR_QUESTION_ANSWERING,
                                                      jsl_anno_class_id=A.XLM_ROBERTA_FOR_QUESTION_ANSWERING,
                                                      jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                          A.XLM_ROBERTA_FOR_QUESTION_ANSWERING],
                                                      node=NLP_FEATURE_NODES.nodes[
                                                          A.XLM_ROBERTA_FOR_QUESTION_ANSWERING],
                                                      get_default_model=SpanXlmRobertaClassifier.get_default_model,
                                                      get_pretrained_model=SpanXlmRobertaClassifier.get_pretrained_model,
                                                      type=T.QUESTION_SPAN_CLASSIFIER,
                                                      pdf_extractor_methods={
                                                          'default': default_span_classifier_config,
                                                          'default_full': default_full_span_classifier_config, },
                                                      pdf_col_name_substitutor=substitute_span_classifier_cols,
                                                      output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                      description='TODO',
                                                      provider=ComponentBackends.open_source,
                                                      license=Licenses.open_source,
                                                      computation_context=ComputeContexts.spark,
                                                      output_context=ComputeContexts.spark,
                                                      ),
        H_A.MEDICAL_QUESTION_ANSWERING: partial(NluComponent,
                                                      name=H_A.MEDICAL_QUESTION_ANSWERING,
                                                      jsl_anno_class_id= H_A.MEDICAL_QUESTION_ANSWERING,
                                                      jsl_anno_py_class= ACR.JSL_anno_HC_ref_2_py_class[
                                                          H_A.MEDICAL_QUESTION_ANSWERING],
                                                      node= NLP_HC_FEATURE_NODES.nodes[
                                                          H_A.MEDICAL_QUESTION_ANSWERING],
                                                      get_default_model= SpanMedical.get_default_model,
                                                      get_pretrained_model= SpanMedical.get_pretrained_model,
                                                      type= T.QUESTION_SPAN_CLASSIFIER,
                                                      pdf_extractor_methods={
                                                          'default': default_span_classifier_config,
                                                          'default_full': default_full_span_classifier_config, },
                                                      pdf_col_name_substitutor=substitute_hc_span_classifier_cols,
                                                      output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                      description='TODO',
                                                      provider=ComponentBackends.hc,
                                                      license=Licenses.hc,
                                                      computation_context=ComputeContexts.spark,
                                                      output_context=ComputeContexts.spark,
                                                      ),

        A.MULTI_DOCUMENT_ASSEMBLER: partial(NluComponent,
                                            name=A.MULTI_DOCUMENT_ASSEMBLER,
                                            type=T.HELPER_ANNO,
                                            get_default_model=SparkNlpMultiDocumentAssembler.get_default_model,
                                            pdf_extractor_methods={'default': default_binary_to_image_config},
                                            pdf_col_name_substitutor=substitute_recognized_text_cols,
                                            output_level=L.DOCUMENT,
                                            node=NLP_FEATURE_NODES.nodes[
                                                A.MULTI_DOCUMENT_ASSEMBLER],
                                            description='TODO',
                                            provider=ComponentBackends.open_source,

                                            license=Licenses.open_source,
                                            computation_context=ComputeContexts.spark,
                                            output_context=ComputeContexts.spark,
                                            jsl_anno_class_id=A.MULTI_DOCUMENT_ASSEMBLER,
                                            jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                A.MULTI_DOCUMENT_ASSEMBLER],
                                            ),

        A.VIT_IMAGE_CLASSIFICATION: partial(NluComponent,
                                            name=A.VIT_IMAGE_CLASSIFICATION,
                                            type=T.IMAGE_CLASSIFICATION,
                                            get_default_model=VitImageClassifier.get_default_model,
                                            get_pretrained_model=VitImageClassifier.get_pretrained_model,
                                            pdf_extractor_methods={'default': default_document_config,
                                                                   'default_full': default_full_config},
                                            pdf_col_name_substitutor=substitute_recognized_text_cols,
                                            output_level=L.DOCUMENT,
                                            node=NLP_FEATURE_NODES.nodes[
                                                A.VIT_IMAGE_CLASSIFICATION],
                                            description='TODO',
                                            provider=ComponentBackends.open_source,

                                            license=Licenses.open_source,
                                            computation_context=ComputeContexts.spark,
                                            output_context=ComputeContexts.spark,
                                            jsl_anno_class_id=A.VIT_IMAGE_CLASSIFICATION,
                                            jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                A.VIT_IMAGE_CLASSIFICATION],
                                            requires_image_format=True,
                                            is_visual_annotator=True,
                                            ),

        A.CONVNEXT_IMAGE_CLASSIFICATIONFITTED: partial(NluComponent,
                                                       name=A.CONVNEXT_IMAGE_CLASSIFICATIONFITTED,
                                                       type=T.IMAGE_CLASSIFICATION,
                                                       get_default_model=ConvNextImageClassifier.get_default_model,
                                                       get_pretrained_model=ConvNextImageClassifier.get_pretrained_model,
                                                       pdf_extractor_methods={'default': default_document_config,
                                                                              'default_full': default_full_config},
                                                       pdf_col_name_substitutor=substitute_recognized_text_cols,
                                                       output_level=L.DOCUMENT,
                                                       node=NLP_FEATURE_NODES.nodes[
                                                           A.CONVNEXT_IMAGE_CLASSIFICATION],
                                                       description='TODO',
                                                       provider=ComponentBackends.open_source,

                                                       license=Licenses.open_source,
                                                       computation_context=ComputeContexts.spark,
                                                       output_context=ComputeContexts.spark,
                                                       jsl_anno_class_id=A.CONVNEXT_IMAGE_CLASSIFICATION,
                                                       jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                           A.CONVNEXT_IMAGE_CLASSIFICATIONFITTED],
                                                       requires_image_format=True,
                                                       is_visual_annotator=True,

                                                       ),
        A.CONVNEXT_IMAGE_CLASSIFICATION: partial(NluComponent,
                                                 name=A.CONVNEXT_IMAGE_CLASSIFICATION,
                                                 type=T.IMAGE_CLASSIFICATION,
                                                 get_default_model=ConvNextImageClassifier.get_default_model,
                                                 get_pretrained_model=ConvNextImageClassifier.get_pretrained_model,
                                                 pdf_extractor_methods={'default': default_document_config,
                                                                        'default_full': default_full_config},
                                                 pdf_col_name_substitutor=substitute_recognized_text_cols,
                                                 output_level=L.DOCUMENT,
                                                 node=NLP_FEATURE_NODES.nodes[
                                                     A.CONVNEXT_IMAGE_CLASSIFICATION],
                                                 description='TODO',
                                                 provider=ComponentBackends.open_source,

                                                 license=Licenses.open_source,
                                                 computation_context=ComputeContexts.spark,
                                                 output_context=ComputeContexts.spark,
                                                 jsl_anno_class_id=A.CONVNEXT_IMAGE_CLASSIFICATION,
                                                 jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                     A.CONVNEXT_IMAGE_CLASSIFICATION],
                                                 requires_image_format=True,
                                                 is_visual_annotator=True,

                                                 ),

        A.SWIN_IMAGE_CLASSIFICATION: partial(NluComponent,
                                             name=A.SWIN_IMAGE_CLASSIFICATION,
                                             type=T.IMAGE_CLASSIFICATION,
                                             get_default_model=SwinImageClassifier.get_default_model,
                                             get_pretrained_model=SwinImageClassifier.get_pretrained_model,
                                             pdf_extractor_methods={'default': default_document_config,
                                                                    'default_full': default_full_config},
                                             pdf_col_name_substitutor=substitute_recognized_text_cols,
                                             output_level=L.DOCUMENT,
                                             node=NLP_FEATURE_NODES.nodes[
                                                 A.SWIN_IMAGE_CLASSIFICATION],
                                             description='TODO',
                                             provider=ComponentBackends.open_source,

                                             license=Licenses.open_source,
                                             computation_context=ComputeContexts.spark,
                                             output_context=ComputeContexts.spark,
                                             jsl_anno_class_id=A.SWIN_IMAGE_CLASSIFICATION,
                                             jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                 A.SWIN_IMAGE_CLASSIFICATION],
                                             requires_image_format=True,
                                             is_visual_annotator=True,
                                             ),

        A.IMAGE_ASSEMBLER: partial(NluComponent,
                                   name=A.IMAGE_ASSEMBLER,
                                   type=T.HELPER_ANNO,
                                   get_default_model=SparkNlpImageAssembler.get_default_model,
                                   pdf_extractor_methods={'default': default_image_assembler_config,
                                                          'default_full': default_full_config},
                                   pdf_col_name_substitutor=substitute_img_assembler_cols,
                                   output_level=L.DOCUMENT,
                                   node=NLP_FEATURE_NODES.nodes[A.IMAGE_ASSEMBLER],
                                   description='todo',
                                   provider=ComponentBackends.open_source,
                                   license=Licenses.open_source,
                                   computation_context=ComputeContexts.spark,
                                   output_context=ComputeContexts.spark,
                                   jsl_anno_class_id=A.IMAGE_ASSEMBLER,
                                   jsl_anno_py_class=ACR.JSL_anno2_py_class[A.IMAGE_ASSEMBLER],
                                   applicable_file_types=['JPEG', 'PNG', 'BMP', 'WBMP', 'GIF', 'JPG', 'TIFF']
                                   ),

        ######### HEALTHCARE ##############

        H_A.ASSERTION_DL: partial(NluComponent,
                                  name=H_A.ASSERTION_DL,
                                  type=T.CHUNK_CLASSIFIER,
                                  get_default_model=AssertionDL.get_default_model,
                                  get_pretrained_model=AssertionDL.get_pretrained_model,
                                  get_trainable_model=AssertionDL.get_default_trainable_model,
                                  pdf_extractor_methods={'default': default_assertion_config,
                                                         'default_full': default_full_config, },
                                  pdf_col_name_substitutor=substitute_assertion_cols,
                                  output_level=L.CHUNK,
                                  node=NLP_HC_FEATURE_NODES.nodes[H_A.ASSERTION_DL],
                                  description='Deep Learning based Assertion model_anno_obj that maps NER-Chunks into a pre-defined terminology.',
                                  provider=ComponentBackends.hc,
                                  license=Licenses.hc,
                                  computation_context=ComputeContexts.spark,
                                  output_context=ComputeContexts.spark,
                                  jsl_anno_class_id=H_A.ASSERTION_DL,
                                  jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.ASSERTION_DL],
                                  has_storage_ref=True,
                                  is_storage_ref_consumer=True,
                                  trainable_mirror_anno=H_A.TRAINABLE_ASSERTION_DL
                                  ),
        H_A.TRAINABLE_ASSERTION_DL: partial(NluComponent,
                                            name=H_A.TRAINABLE_ASSERTION_DL,
                                            type=T.CHUNK_CLASSIFIER,
                                            get_default_model=AssertionDL.get_default_model,
                                            get_pretrained_model=AssertionDL.get_pretrained_model,
                                            get_trainable_model=AssertionDL.get_default_trainable_model,
                                            pdf_extractor_methods={'default': default_assertion_config,
                                                                   'default_full': default_full_config, },
                                            pdf_col_name_substitutor=substitute_assertion_cols,
                                            output_level=L.CHUNK,
                                            node=NLP_HC_FEATURE_NODES.nodes[H_A.TRAINABLE_ASSERTION_DL],
                                            description='Trainable Deep Learning based Assertion model_anno_obj that maps NER-Chunks into a pre-defined terminology.',
                                            provider=ComponentBackends.hc,
                                            license=Licenses.hc,
                                            computation_context=ComputeContexts.spark,
                                            output_context=ComputeContexts.spark,
                                            jsl_anno_class_id=H_A.TRAINABLE_ASSERTION_DL,
                                            jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                H_A.TRAINABLE_ASSERTION_DL],
                                            has_storage_ref=True,
                                            is_storage_ref_consumer=True,
                                            trainable=True,
                                            trained_mirror_anno=H_A.ASSERTION_DL),
        # H_A.ASSERTION_FILTERER: partial(NluComponent, # TODO not integrated
        #     name=H_A.ASSERTION_FILTERER,
        #     type=T.CHUNK_FILTERER,
        #     get_default_model=AssertionDL.get_default_model,
        #     get_pretrained_model=AssertionDL.get_pretrained_model,
        #     get_trainable_model=AssertionDL.get_default_trainable_model,
        #     pdf_extractor_methods={'default': default_assertion_config, 'default_full': default_full_config, },
        #     pdf_col_name_substitutor=substitute_assertion_cols,
        #     pipe_prediction_output_level=L.CHUNK,
        #     node=NLP_HC_FEATURE_NODES.ASSERTION_DL,
        #     description='Trainable Deep Learning based Assertion model_anno_obj that maps NER-Chunks into a pre-defined terminology.',
        #     provider=ComponentBackends.hc,
        #     license=Licenses.hc,
        #     computation_context=ComputeContexts.spark,
        #     output_context=ComputeContexts.spark,
        #     jsl_anno_class_id_id=H_A.ASSERTION_FILTERER,
        #     jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.ASSERTION_FILTERER],
        #
        #     has_storage_ref=True,
        #     is_is_storage_ref_consumer=True,
        #     trainable=True,
        #     trained_mirror_anno=H_A.ASSERTION_FILTERER), AssertionLogReg
        H_A.ASSERTION_LOG_REG: partial(NluComponent,
                                       name=H_A.ASSERTION_LOG_REG,
                                       type=T.CHUNK_CLASSIFIER,
                                       get_default_model=AssertionLogReg.get_default_model,
                                       get_pretrained_model=AssertionLogReg.get_pretrained_model,
                                       get_trainable_model=AssertionLogReg.get_default_trainable_model,
                                       pdf_extractor_methods={'default': default_assertion_config,
                                                              'default_full': default_full_config, },
                                       pdf_col_name_substitutor=substitute_assertion_cols,
                                       output_level=L.CHUNK,
                                       node=NLP_HC_FEATURE_NODES.nodes[H_A.ASSERTION_LOG_REG],
                                       description='Classical ML based Assertion model_anno_obj that maps NER-Chunks into a pre-defined terminology.',
                                       provider=ComponentBackends.hc,
                                       license=Licenses.hc,
                                       computation_context=ComputeContexts.spark,
                                       output_context=ComputeContexts.spark,
                                       jsl_anno_class_id=H_A.ASSERTION_LOG_REG,
                                       jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.ASSERTION_LOG_REG],
                                       trained_mirror_anno=H_A.TRAINABLE_ASSERTION_LOG_REG),
        H_A.TRAINABLE_ASSERTION_LOG_REG: partial(NluComponent,
                                                 name=H_A.TRAINABLE_ASSERTION_LOG_REG,
                                                 type=T.CHUNK_CLASSIFIER,
                                                 get_default_model=AssertionLogReg.get_default_model,
                                                 get_pretrained_model=AssertionLogReg.get_pretrained_model,
                                                 get_trainable_model=AssertionLogReg.get_default_trainable_model,
                                                 pdf_extractor_methods={'default': default_assertion_config,
                                                                        'default_full': default_full_config, },
                                                 pdf_col_name_substitutor=substitute_assertion_cols,
                                                 output_level=L.CHUNK,
                                                 node=NLP_HC_FEATURE_NODES.nodes[H_A.TRAINABLE_ASSERTION_LOG_REG],
                                                 description='Classical ML based Assertion model_anno_obj that maps NER-Chunks into a pre-defined terminology.',
                                                 provider=ComponentBackends.hc,
                                                 license=Licenses.hc,
                                                 computation_context=ComputeContexts.spark,
                                                 output_context=ComputeContexts.spark,
                                                 jsl_anno_class_id=H_A.TRAINABLE_ASSERTION_LOG_REG,
                                                 jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                     H_A.TRAINABLE_ASSERTION_LOG_REG],
                                                 trained_mirror_anno=H_A.ASSERTION_LOG_REG),
        H_A.CHUNK_MERGE: partial(NluComponent,
                                 name=H_A.CONTEXTUAL_PARSER,
                                 type=T.CHUNK_CLASSIFIER,

                                 get_default_model=ContextualParser.get_default_model,
                                 get_trainable_model=ContextualParser.get_trainable_model,
                                 # TODO method extractr method
                                 pdf_extractor_methods={'default': default_chunk_config,
                                                        'default_full': default_full_config, },
                                 # TODO  substitor
                                 pdf_col_name_substitutor=substitute_chunk_cols,
                                 output_level=L.CHUNK,
                                 node=NLP_HC_FEATURE_NODES.nodes[H_A.CHUNK_MERGE],
                                 description='Rule based entity extractor.',
                                 provider=ComponentBackends.hc,
                                 license=Licenses.hc,
                                 computation_context=ComputeContexts.spark,
                                 output_context=ComputeContexts.spark,
                                 jsl_anno_class_id=H_A.CHUNK_MERGE,
                                 jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.CHUNK_MERGE], ),
        H_A.CONTEXTUAL_PARSER: partial(NluComponent,
                                       name=H_A.CONTEXTUAL_PARSER,
                                       type=T.CHUNK_CLASSIFIER,
                                       get_default_model=ContextualParser.get_default_model,
                                       get_trainable_model=ContextualParser.get_trainable_model,
                                       # TODO extractr method
                                       pdf_extractor_methods={'default': default_full_config,
                                                              'default_full': default_full_config, },
                                       # TODO  substitor
                                       pdf_col_name_substitutor=substitute_context_parser_cols,
                                       output_level=L.CHUNK,
                                       node=NLP_HC_FEATURE_NODES.nodes[H_A.CONTEXTUAL_PARSER],
                                       description='Rule based entity extractor.',
                                       provider=ComponentBackends.hc,
                                       license=Licenses.hc,
                                       computation_context=ComputeContexts.spark,
                                       output_context=ComputeContexts.spark,
                                       jsl_anno_class_id=H_A.CONTEXTUAL_PARSER,
                                       jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.CONTEXTUAL_PARSER], ),
        H_A.DE_IDENTIFICATION: partial(NluComponent,
                                       name=H_A.DE_IDENTIFICATION,
                                       type=T.CHUNK_CLASSIFIER,
                                       get_default_model=Deidentifier.get_default_model,
                                       get_pretrained_model=Deidentifier.get_pretrained_model,
                                       pdf_extractor_methods={'default': default_de_identification_config,
                                                              'default_full': default_full_config, },
                                       pdf_col_name_substitutor=substitute_de_identification_cols,
                                       output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                       node=NLP_HC_FEATURE_NODES.nodes[H_A.DE_IDENTIFICATION],
                                       description='De-Identify named entity according to various Healthcare Data Protection standards',
                                       provider=ComponentBackends.hc,
                                       license=Licenses.hc,
                                       computation_context=ComputeContexts.spark,
                                       output_context=ComputeContexts.spark,
                                       jsl_anno_class_id=H_A.DE_IDENTIFICATION,
                                       jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.DE_IDENTIFICATION],
                                       trainable_mirror_anno=H_A.TRAINABLE_DE_IDENTIFICATION

                                       ),
        H_A.TRAINABLE_DE_IDENTIFICATION: partial(NluComponent,  # TODO WIP
                                                 name=H_A.TRAINABLE_DE_IDENTIFICATION,
                                                 type=T.CHUNK_CLASSIFIER,
                                                 get_default_model=Deidentifier.get_default_model,
                                                 get_pretrained_model=Deidentifier.get_pretrained_model,
                                                 get_trainable_model=Deidentifier.get_trainable_model,
                                                 pdf_extractor_methods={'default': default_de_identification_config,
                                                                        'default_full': default_full_config, },
                                                 pdf_col_name_substitutor=substitute_de_identification_cols,
                                                 output_level=L.DOCUMENT,
                                                 node=NLP_HC_FEATURE_NODES.nodes[H_A.TRAINABLE_DE_IDENTIFICATION],
                                                 description='De-Identify named entity according to various Healthcare Data Protection standards',
                                                 provider=ComponentBackends.hc,
                                                 license=Licenses.hc,
                                                 computation_context=ComputeContexts.spark,
                                                 output_context=ComputeContexts.spark,
                                                 jsl_anno_class_id=H_A.TRAINABLE_DE_IDENTIFICATION,
                                                 jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                     H_A.TRAINABLE_DE_IDENTIFICATION],
                                                 trainable=True,
                                                 trained_mirror_anno=H_A.DE_IDENTIFICATION

                                                 ),

        H_A.DRUG_NORMALIZER: partial(NluComponent,
                                     name=H_A.DRUG_NORMALIZER,
                                     type=T.CHUNK_CLASSIFIER,
                                     get_default_model=DrugNorm.get_default_model,
                                     pdf_extractor_methods={'default': default_only_result_config,
                                                            'default_full': default_full_config, },
                                     pdf_col_name_substitutor=substitute_drug_normalizer_cols,
                                     output_level=L.DOCUMENT,
                                     node=NLP_HC_FEATURE_NODES.nodes[H_A.DRUG_NORMALIZER],
                                     description='Normalizes raw clinical and crawled text which contains drug names into cleaned and standardized representation',
                                     provider=ComponentBackends.hc,
                                     license=Licenses.hc,
                                     computation_context=ComputeContexts.spark,
                                     output_context=ComputeContexts.spark,
                                     jsl_anno_class_id=H_A.DRUG_NORMALIZER,
                                     jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.DRUG_NORMALIZER], ),
        # H_A.FEATURES_ASSEMBLER: partial(NluComponent, # TODO partially integrated. featire mpde ,ossomg
        #     name=H_A.FEATURES_ASSEMBLER,
        #     type=T.HELPER_ANNO,
        #     get_default_model=SparkNLPFeatureAssembler.get_default_model,
        #     pdf_extractor_methods={'default': default_feature_assembler_config, 'default_full': default_full_config, },
        #     # pdf_col_name_substitutor=substitute_drug_normalizer_cols, # TODO no substition
        #     pipe_prediction_output_level=L.DOCUMENT, # TODO double check output level?
        #     node=NLP_HC_FEATURE_NODES.FEATURES_ASSEMBLER,
        #     description='Aggregated features from various annotators into one column for training generic classifiers',
        #     provider=ComponentBackends.hc,
        #     license=Licenses.hc,
        #     computation_context=ComputeContexts.spark,
        #     output_context=ComputeContexts.spark,
        #     jsl_anno_class_id_id=H_A.FEATURES_ASSEMBLER,
        #     jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.FEATURES_ASSEMBLER],
        #
        H_A.GENERIC_CLASSIFIER: partial(NluComponent,
                                        name=H_A.GENERIC_CLASSIFIER,
                                        type=T.DOCUMENT_CLASSIFIER,
                                        get_default_model=GenericClassifier.get_default_model,
                                        get_trainable_model=GenericClassifier.get_default_model,
                                        get_pretrained_model=GenericClassifier.get_default_model,
                                        pdf_extractor_methods={'default': default_generic_classifier_config,
                                                               'default_full': default_full_config, },
                                        pdf_col_name_substitutor=substitute_generic_classifier_parser_cols,
                                        output_level=L.DOCUMENT,
                                        node=NLP_HC_FEATURE_NODES.nodes[H_A.GENERIC_CLASSIFIER],
                                        description='Generic Deep Learning based tensorflow classifier',
                                        provider=ComponentBackends.hc,
                                        license=Licenses.hc,
                                        computation_context=ComputeContexts.spark,
                                        output_context=ComputeContexts.spark,
                                        jsl_anno_class_id=H_A.GENERIC_CLASSIFIER,
                                        jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.GENERIC_CLASSIFIER],
                                        trainable_mirror_anno=H_A.TRAINABLE_GENERIC_CLASSIFIER
                                        ),
        H_A.TRAINABLE_GENERIC_CLASSIFIER: partial(NluComponent,
                                                  name=H_A.TRAINABLE_GENERIC_CLASSIFIER,
                                                  type=T.DOCUMENT_CLASSIFIER,
                                                  get_default_model=GenericClassifier.get_default_model,
                                                  get_trainable_model=GenericClassifier.get_default_model,
                                                  get_pretrained_model=GenericClassifier.get_default_model,
                                                  pdf_extractor_methods={'default': default_generic_classifier_config,
                                                                         'default_full': default_full_config, },
                                                  pdf_col_name_substitutor=substitute_generic_classifier_parser_cols,
                                                  output_level=L.DOCUMENT,
                                                  node=NLP_HC_FEATURE_NODES.nodes[H_A.GENERIC_CLASSIFIER],
                                                  description='Generic Deep Learning based tensorflow classifier',
                                                  provider=ComponentBackends.hc,
                                                  license=Licenses.hc,
                                                  computation_context=ComputeContexts.spark,
                                                  output_context=ComputeContexts.spark,
                                                  jsl_anno_class_id=H_A.TRAINABLE_GENERIC_CLASSIFIER,
                                                  jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                      H_A.TRAINABLE_GENERIC_CLASSIFIER],

                                                  trained_mirror_anno=H_A.GENERIC_CLASSIFIER
                                                  ),

        H_A.MEDICAL_NER: partial(NluComponent,
                                 name=H_A.MEDICAL_NER,
                                 type=T.CHUNK_CLASSIFIER,
                                 get_default_model=NERDLHealthcare.get_default_model,
                                 get_trainable_model=NERDLHealthcare.get_default_trainable_model,
                                 get_pretrained_model=NERDLHealthcare.get_pretrained_model,
                                 pdf_extractor_methods={'default': default_ner_config,
                                                        'default_full': default_full_config, },
                                 pdf_col_name_substitutor=substitute_ner_dl_cols,
                                 output_level=L.TOKEN,
                                 node=NLP_HC_FEATURE_NODES.nodes[H_A.MEDICAL_NER],
                                 description='Deep Learning based Medical Named Entity Recognizer (NER)',
                                 provider=ComponentBackends.hc,
                                 license=Licenses.hc,
                                 computation_context=ComputeContexts.spark,
                                 output_context=ComputeContexts.spark,
                                 jsl_anno_class_id=H_A.MEDICAL_NER,
                                 jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.MEDICAL_NER],
                                 trainable_mirror_anno=H_A.TRAINABLE_MEDICAL_NER,
                                 has_storage_ref=True,
                                 is_storage_ref_consumer=True
                                 ),
        H_A.TRAINABLE_MEDICAL_NER: partial(NluComponent,
                                           name=H_A.TRAINABLE_MEDICAL_NER,
                                           type=T.CHUNK_CLASSIFIER,
                                           get_default_model=NERDLHealthcare.get_default_model,
                                           get_trainable_model=NERDLHealthcare.get_default_model,
                                           get_pretrained_model=NERDLHealthcare.get_default_model,
                                           pdf_extractor_methods={'default': default_ner_config,
                                                                  'default_full': default_full_config, },
                                           pdf_col_name_substitutor=substitute_ner_dl_cols,
                                           output_level=L.TOKEN,
                                           node=NLP_HC_FEATURE_NODES.nodes[H_A.TRAINABLE_MEDICAL_NER],
                                           description='Trainable Deep Learning based Medical Named Entity Recognizer (NER)',
                                           provider=ComponentBackends.hc,
                                           license=Licenses.hc,
                                           computation_context=ComputeContexts.spark,
                                           output_context=ComputeContexts.spark,
                                           jsl_anno_class_id=H_A.TRAINABLE_MEDICAL_NER,
                                           jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.TRAINABLE_MEDICAL_NER],
                                           trained_mirror_anno=H_A.TRAINABLE_MEDICAL_NER,
                                           has_storage_ref=True,
                                           is_storage_ref_consumer=True
                                           ),

        H_A.NER_CONVERTER_INTERNAL: partial(NluComponent,
                                            name=H_A.NER_CONVERTER_INTERNAL,
                                            type=T.HELPER_ANNO,
                                            get_default_model=NerToChunkConverterLicensed.get_default_model,
                                            pdf_extractor_methods={'default': default_NER_converter_licensed_config,
                                                                   'default_full': default_full_config, },
                                            pdf_col_name_substitutor=substitute_ner_internal_converter_cols,
                                            output_level=L.CHUNK,
                                            node=NLP_HC_FEATURE_NODES.nodes[H_A.NER_CONVERTER_INTERNAL],
                                            description='Convert NER-IOB tokens into concatenated strings (aka chunks)',
                                            provider=ComponentBackends.hc,
                                            license=Licenses.hc,
                                            computation_context=ComputeContexts.spark,
                                            output_context=ComputeContexts.spark,
                                            jsl_anno_class_id=H_A.NER_CONVERTER_INTERNAL,
                                            jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                H_A.NER_CONVERTER_INTERNAL],

                                            ),

        A.PARTIAL_NerConverterInternalModel: partial(NluComponent,
                                                     name=A.PARTIAL_NerConverterInternalModel,
                                                     jsl_anno_class_id=A.PARTIAL_NerConverterInternalModel,
                                                     jsl_anno_py_class=ACR.JSL_anno2_py_class[
                                                         A.PARTIAL_NerConverterInternalModel],
                                                     node=NLP_HC_FEATURE_NODES.nodes[H_A.NER_CONVERTER_INTERNAL],
                                                     type=T.HELPER_ANNO,
                                                     pdf_extractor_methods={
                                                         'default': default_NER_converter_licensed_config,
                                                         'default_full': default_full_config, },
                                                     pdf_col_name_substitutor=substitute_ner_internal_converter_cols,
                                                     output_level=L.CHUNK,
                                                     description='Not fully integrated',
                                                     provider=ComponentBackends.open_source,
                                                     license=Licenses.hc,
                                                     computation_context=ComputeContexts.spark,
                                                     output_context=ComputeContexts.spark,
                                                     ),

        H_A.RELATION_EXTRACTION: partial(NluComponent,
                                         name=H_A.RELATION_EXTRACTION,
                                         type=T.RELATION_CLASSIFIER,
                                         get_default_model=RelationExtraction.get_default_model,
                                         get_pretrained_model=RelationExtraction.get_pretrained_model,
                                         get_trainable_model=RelationExtraction.get_default_trainable_model,
                                         pdf_extractor_methods={
                                             'default': default_relation_extraction_positional_config,
                                             'positional': default_relation_extraction_positional_config,
                                             'default_full': default_full_config, },
                                         pdf_col_name_substitutor=substitute_relation_cols,
                                         output_level=L.RELATION,
                                         node=NLP_HC_FEATURE_NODES.nodes[H_A.RELATION_EXTRACTION],
                                         description='Classical ML model_anno_obj for predicting relation ship between entity pairs',
                                         provider=ComponentBackends.hc,
                                         license=Licenses.hc,
                                         computation_context=ComputeContexts.spark,
                                         output_context=ComputeContexts.spark,
                                         jsl_anno_class_id=H_A.RELATION_EXTRACTION,
                                         jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.RELATION_EXTRACTION],
                                         trainable_mirror_anno=H_A.TRAINABLE_RELATION_EXTRACTION,
                                         has_storage_ref=True,
                                         is_storage_ref_consumer=True

                                         ),

        A.PARTIAL_PosologyREModel: partial(NluComponent,
                                           name=A.PARTIAL_ChunkMergeApproach,
                                           jsl_anno_class_id=A.PARTIAL_PosologyREModel,
                                           jsl_anno_py_class=ACR.JSL_anno2_py_class[A.PARTIAL_PosologyREModel],
                                           node=NLP_FEATURE_NODES.nodes[A.PARTIALLY_IMPLEMENTED],
                                           type=T.RELATION_CLASSIFIER,
                                           pdf_extractor_methods={
                                               'default': default_relation_extraction_positional_config,
                                               # 'positional': default_relation_extraction_positional_config,
                                               'default_full': default_full_config, },
                                           pdf_col_name_substitutor=substitute_relation_cols,
                                           output_level=L.RELATION,
                                           description='Not fully integrated',
                                           provider=ComponentBackends.hc,
                                           license=Licenses.hc,
                                           computation_context=ComputeContexts.spark,
                                           output_context=ComputeContexts.spark,
                                           ),

        H_A.TRAINABLE_RELATION_EXTRACTION: partial(NluComponent,
                                                   name=H_A.TRAINABLE_RELATION_EXTRACTION,
                                                   type=T.RELATION_CLASSIFIER,
                                                   get_default_model=RelationExtraction.get_default_model,
                                                   get_pretrained_model=RelationExtraction.get_pretrained_model,
                                                   get_trainable_model=RelationExtraction.get_default_trainable_model,
                                                   pdf_extractor_methods={
                                                       'default': default_relation_extraction_positional_config,
                                                       'positional': default_relation_extraction_positional_config,
                                                       'default_full': default_full_config, },
                                                   pdf_col_name_substitutor=substitute_relation_cols,
                                                   output_level=L.RELATION,
                                                   node=NLP_HC_FEATURE_NODES.nodes[H_A.TRAINABLE_RELATION_EXTRACTION],
                                                   description='Trainable Classical ML model_anno_obj for predicting relation ship between entity pairs',
                                                   provider=ComponentBackends.hc,
                                                   license=Licenses.hc,
                                                   computation_context=ComputeContexts.spark,
                                                   output_context=ComputeContexts.spark,
                                                   jsl_anno_class_id=H_A.TRAINABLE_RELATION_EXTRACTION,
                                                   jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                       H_A.TRAINABLE_RELATION_EXTRACTION],
                                                   trained_mirror_anno=H_A.RELATION_EXTRACTION,
                                                   trainable=True,
                                                   has_storage_ref=True,
                                                   is_storage_ref_consumer=True
                                                   ),

        H_A.ZERO_SHOT_RELATION_EXTRACTION: partial(NluComponent,
                                                   name=H_A.ZERO_SHOT_RELATION_EXTRACTION,
                                                   type=T.RELATION_CLASSIFIER,
                                                   get_default_model=ZeroShotRelationExtractor.get_default_model,
                                                   get_pretrained_model=ZeroShotRelationExtractor.get_pretrained_model,
                                                   pdf_extractor_methods={
                                                       'default': default_relation_extraction_positional_config,
                                                       'positional': default_relation_extraction_positional_config,
                                                       'default_full': default_full_config, },
                                                   pdf_col_name_substitutor=substitute_relation_cols,
                                                   output_level=L.RELATION,
                                                   node=NLP_HC_FEATURE_NODES.nodes[H_A.ZERO_SHOT_RELATION_EXTRACTION],
                                                   description='Zero-shot relation extraction model_anno_obj that leverages BertForSequenceClassificaiton to return, based on a predefined set of relation',
                                                   provider=ComponentBackends.hc,
                                                   license=Licenses.hc,
                                                   computation_context=ComputeContexts.spark,
                                                   output_context=ComputeContexts.spark,
                                                   jsl_anno_class_id=H_A.ZERO_SHOT_RELATION_EXTRACTION,
                                                   jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                       H_A.ZERO_SHOT_RELATION_EXTRACTION],
                                                   trained_mirror_anno=H_A.RELATION_EXTRACTION,
                                                   ),
        H_A.ZERO_SHOT_NER: partial(NluComponent,
                                   name=H_A.ZERO_SHOT_NER,
                                   type=T.CHUNK_CLASSIFIER,
                                   get_default_model=ZeroShotNer.get_default_model,
                                   get_pretrained_model=ZeroShotNer.get_pretrained_model,
                                   pdf_extractor_methods={'default': default_ner_config,
                                                          'default_full': default_full_config, },
                                   pdf_col_name_substitutor=substitute_ner_dl_cols,
                                   output_level=L.TOKEN,
                                   node=NLP_HC_FEATURE_NODES.nodes[H_A.ZERO_SHOT_NER],
                                   description='Deep Learning based Zero SHot Named Entity Recognizer (NER)',
                                   provider=ComponentBackends.hc,
                                   license=Licenses.hc,
                                   computation_context=ComputeContexts.spark,
                                   output_context=ComputeContexts.spark,
                                   jsl_anno_class_id=H_A.ZERO_SHOT_NER,
                                   jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.ZERO_SHOT_NER],
                                   ),

        H_A.RELATION_EXTRACTION_DL: partial(NluComponent,
                                            name=H_A.RELATION_EXTRACTION_DL,
                                            type=T.RELATION_CLASSIFIER,
                                            get_default_model=RelationExtractionDL.get_default_model,
                                            get_pretrained_model=RelationExtractionDL.get_pretrained_model,
                                            # get_trainable_model=RelationExtractionDL.get_default_trainable_model,
                                            pdf_extractor_methods={
                                                'default': default_relation_extraction_positional_config,
                                                'positional': default_relation_extraction_positional_config,
                                                'default_full': default_full_config, },
                                            pdf_col_name_substitutor=substitute_relation_cols,
                                            output_level=L.RELATION,
                                            node=NLP_HC_FEATURE_NODES.nodes[H_A.RELATION_EXTRACTION_DL],
                                            description='Deep Learning based model_anno_obj for predicting relation ship between entity pairs',
                                            provider=ComponentBackends.hc,
                                            license=Licenses.hc,
                                            computation_context=ComputeContexts.spark,
                                            output_context=ComputeContexts.spark,
                                            jsl_anno_class_id=H_A.RELATION_EXTRACTION_DL,
                                            jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                H_A.RELATION_EXTRACTION_DL],

                                            # trainable_mirror_anno=H_A.TRAINABLE_RELATION_EXTRACTION_DL
                                            ),

        H_A.CHUNK_MAPPER_MODEL: partial(NluComponent,
                                        prefer_light_pipe=True,
                                        name=H_A.CHUNK_MAPPER_MODEL,
                                        type=T.CHUNK_MAPPER,
                                        get_default_model=ChunkMapper.get_default_model,
                                        get_pretrained_model=ChunkMapper.get_pretrained_model,
                                        # TODO EXTRACTORS!/subs
                                        pdf_extractor_methods={'default': default_chunk_mapper_config,
                                                               # 'positional': default_relation_extraction_positional_config,
                                                               'default_full': default_full_config, },
                                        pdf_col_name_substitutor=substitute_chunk_mapper_cols,
                                        output_level=L.CHUNK,
                                        node=NLP_HC_FEATURE_NODES.nodes[H_A.CHUNK_MAPPER_MODEL],
                                        description='Map entities into relation and metadata',
                                        provider=ComponentBackends.hc,
                                        license=Licenses.hc,
                                        computation_context=ComputeContexts.spark,
                                        output_context=ComputeContexts.spark,
                                        jsl_anno_class_id=H_A.CHUNK_MAPPER_MODEL,
                                        jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                            H_A.CHUNK_MAPPER_MODEL],

                                        # trainable_mirror_anno=H_A.TRAINABLE_RELATION_EXTRACTION_DL
                                        ),

        # H_A.TRAINABLE_RELATION_EXTRACTION_DL: partial(NluComponent, # DOES NOT EXIST!
        #     name=H_A.TRAINABLE_RELATION_EXTRACTION_DL,
        #     type=T.RELATION_CLASSIFIER,
        #     get_default_model=RelationExtractionDL.get_default_model,
        #     get_pretrained_model=RelationExtractionDL.get_pretrained_model,
        #     pdf_extractor_methods={ 'default': default_relation_extraction_config, 'positional': default_relation_extraction_positional_config, 'default_full'  : default_full_config, },
        #     pdf_col_name_substitutor=substitute_relation_cols,
        #     pipe_prediction_output_level=L.RELATION,
        #     node=NLP_HC_FEATURE_NODES.TRAINABLE_RELATION_EXTRACTION_DL,
        #     description='Trainable Deep Learning based model_anno_obj for predicting relation ship between entity pairs',
        #     provider=ComponentBackends.hc,
        #     license=Licenses.hc,
        #     computation_context=ComputeContexts.spark,
        #     output_context=ComputeContexts.spark,
        #     jsl_anno_class_id_id=H_A.TRAINABLE_RELATION_EXTRACTION_DL,
        #     jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[H_A.TRAINABLE_RELATION_EXTRACTION_DL],
        #
        #     trained_mirror_anno=H_A.RELATION_EXTRACTION_DL,
        #     trainable=True
        # ),
        H_A.SENTENCE_ENTITY_RESOLVER: partial(NluComponent,
                                              name=H_A.SENTENCE_ENTITY_RESOLVER,
                                              type=T.CHUNK_CLASSIFIER,
                                              get_pretrained_model=SentenceResolver.get_pretrained_model,
                                              get_trainable_model=SentenceResolver.get_default_trainable_model,
                                              pdf_extractor_methods={'default': resolver_conifg_with_metadata,
                                                                     'default_full': full_resolver_config, },
                                              pdf_col_name_substitutor=substitute_sentence_resolution_cols,
                                              output_level=L.CHUNK,
                                              node=NLP_HC_FEATURE_NODES.nodes[H_A.SENTENCE_ENTITY_RESOLVER],
                                              description='Deep Learning based entity resolver which extracts resolved entities directly from Sentence Embedding. No NER model_anno_obj required.',
                                              provider=ComponentBackends.hc,
                                              license=Licenses.hc,
                                              computation_context=ComputeContexts.spark,
                                              output_context=ComputeContexts.spark,
                                              jsl_anno_class_id=H_A.SENTENCE_ENTITY_RESOLVER,
                                              jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                  H_A.SENTENCE_ENTITY_RESOLVER],

                                              trained_mirror_anno=H_A.TRAINABLE_SENTENCE_ENTITY_RESOLVER,
                                              is_storage_ref_consumer=True,
                                              has_storage_ref=True
                                              ),
        H_A.TRAINABLE_SENTENCE_ENTITY_RESOLVER: partial(NluComponent,
                                                        name=H_A.TRAINABLE_SENTENCE_ENTITY_RESOLVER,
                                                        type=T.CHUNK_CLASSIFIER,
                                                        get_pretrained_model=SentenceResolver.get_pretrained_model,
                                                        get_trainable_model=SentenceResolver.get_default_trainable_model,
                                                        pdf_extractor_methods={
                                                            'default': default_chunk_resolution_config,
                                                            'default_full': default_full_config, },
                                                        pdf_col_name_substitutor=substitute_sentence_resolution_cols,
                                                        output_level=L.RELATION,
                                                        node=NLP_HC_FEATURE_NODES.nodes[
                                                            H_A.TRAINABLE_SENTENCE_ENTITY_RESOLVER],
                                                        description='Trainable Deep Learning based entity resolver which extracts resolved entities directly from Sentence Embedding. No NER model_anno_obj required.',
                                                        provider=ComponentBackends.hc,
                                                        license=Licenses.hc,
                                                        computation_context=ComputeContexts.spark,
                                                        output_context=ComputeContexts.spark,
                                                        jsl_anno_class_id=H_A.TRAINABLE_SENTENCE_ENTITY_RESOLVER,
                                                        jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                            H_A.TRAINABLE_SENTENCE_ENTITY_RESOLVER],
                                                        trained_mirror_anno=H_A.SENTENCE_ENTITY_RESOLVER,
                                                        is_storage_ref_consumer=True,
                                                        trainable=True,
                                                        has_storage_ref=True
                                                        ),
        H_A.MEDICAL_BERT_FOR_TOKEN_CLASSIFICATION: partial(NluComponent,
                                                           name=H_A.MEDICAL_BERT_FOR_TOKEN_CLASSIFICATION,
                                                           type=T.TRANSFORMER_TOKEN_CLASSIFIER,
                                                           get_default_model=TokenBertHealthcare.get_default_model,
                                                           get_pretrained_model=TokenBertHealthcare.get_pretrained_model,
                                                           pdf_extractor_methods={
                                                               'default': default_token_classifier_config,
                                                               'default_full': default_full_config, },
                                                           pdf_col_name_substitutor=substitute_transformer_token_classifier_cols,
                                                           output_level=L.TOKEN,  # Handled like NER model_anno_obj
                                                           node=NLP_HC_FEATURE_NODES.nodes[
                                                               H_A.MEDICAL_BERT_FOR_TOKEN_CLASSIFICATION],
                                                           description='MedicalBertForTokenClassification can load Bert Models with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks.',
                                                           provider=ComponentBackends.open_source,
                                                           license=Licenses.hc,
                                                           computation_context=ComputeContexts.spark,
                                                           output_context=ComputeContexts.spark,
                                                           jsl_anno_class_id=H_A.MEDICAL_BERT_FOR_TOKEN_CLASSIFICATION,
                                                           jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                               H_A.MEDICAL_BERT_FOR_TOKEN_CLASSIFICATION],

                                                           ),

        H_A.MEDICAL_BERT_FOR_SEQUENCE_CLASSIFICATION: partial(NluComponent,
                                                              name=H_A.MEDICAL_BERT_FOR_SEQUENCE_CLASSIFICATION,
                                                              type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                              get_default_model=SeqBertMedicalClassifier.get_default_model,
                                                              get_pretrained_model=SeqBertMedicalClassifier.get_pretrained_model,
                                                              pdf_extractor_methods={
                                                                  'default': default_classifier_dl_config,
                                                                  'default_full': default_full_config, },
                                                              pdf_col_name_substitutor=substitute_seq_bert_classifier_cols,
                                                              output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                              # Handled like NER model_anno_obj
                                                              node=NLP_HC_FEATURE_NODES.nodes[
                                                                  H_A.MEDICAL_BERT_FOR_SEQUENCE_CLASSIFICATION],
                                                              description='Custom Architecture John Snow labs developed, called MedicalBertForSequenceClassification. It can load BERT Models with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for multi-class document classification tasks.',
                                                              provider=ComponentBackends.hc,
                                                              license=Licenses.hc,
                                                              computation_context=ComputeContexts.spark,
                                                              output_context=ComputeContexts.spark,
                                                              jsl_anno_class_id=H_A.MEDICAL_BERT_FOR_SEQUENCE_CLASSIFICATION,
                                                              jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                                  H_A.MEDICAL_BERT_FOR_SEQUENCE_CLASSIFICATION],
                                                              ),

        H_A.MEDICAL_DISTILBERT_FOR_SEQUENCE_CLASSIFICATION: partial(NluComponent,
                                                                    name=H_A.MEDICAL_DISTILBERT_FOR_SEQUENCE_CLASSIFICATION,
                                                                    type=T.TRANSFORMER_SEQUENCE_CLASSIFIER,
                                                                    get_default_model=SeqDilstilBertMedicalClassifier.get_default_model,
                                                                    get_pretrained_model=SeqDilstilBertMedicalClassifier.get_pretrained_model,
                                                                    pdf_extractor_methods={
                                                                        'default': default_classifier_dl_config,
                                                                        'default_full': default_full_config, },
                                                                    pdf_col_name_substitutor=substitute_seq_bert_classifier_cols,
                                                                    output_level=L.INPUT_DEPENDENT_DOCUMENT_CLASSIFIER,
                                                                    # Handled like NER model_anno_obj
                                                                    node=NLP_HC_FEATURE_NODES.nodes[
                                                                        H_A.MEDICAL_DISTILBERT_FOR_SEQUENCE_CLASSIFICATION],
                                                                    description='Custom Architecture John Snow labs developed, called MedicalDistilBertForSequenceClassification. It can load DistilBERT Models with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for multi-class document classification tasks.',
                                                                    provider=ComponentBackends.hc,
                                                                    license=Licenses.hc,
                                                                    computation_context=ComputeContexts.spark,
                                                                    output_context=ComputeContexts.spark,
                                                                    jsl_anno_class_id=H_A.MEDICAL_DISTILBERT_FOR_SEQUENCE_CLASSIFICATION,
                                                                    jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                                        H_A.MEDICAL_DISTILBERT_FOR_SEQUENCE_CLASSIFICATION],
                                                                    ),

        H_A.FEW_SHOT_ASSERTION_CLASSIFIER: partial(NluComponent,
                                                                    name=H_A.FEW_SHOT_ASSERTION_CLASSIFIER,
                                                                    type=T.CHUNK_CLASSIFIER,
                                                                    get_default_model=FewShotAssertionClassifierModel.get_default_model,
                                                                    get_pretrained_model=FewShotAssertionClassifierModel.get_pretrained_model,
                                                                    pdf_extractor_methods={'default': default_assertion_config,
                                                                                          'default_full': default_full_config, },
                                                                    pdf_col_name_substitutor=substitute_assertion_cols,
                                                                    output_level=L.CHUNK,
                                                                    # Handled like NER model_anno_obj
                                                                    node=NLP_HC_FEATURE_NODES.nodes[
                                                                        H_A.FEW_SHOT_ASSERTION_CLASSIFIER],
                                                                    description='Custom Architecture John Snow labs developed, called MedicalDistilBertForSequenceClassification. It can load DistilBERT Models with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for multi-class document classification tasks.',
                                                                    provider=ComponentBackends.hc,
                                                                    license=Licenses.hc,
                                                                    computation_context=ComputeContexts.spark,
                                                                    output_context=ComputeContexts.spark,
                                                                    jsl_anno_class_id=H_A.FEW_SHOT_ASSERTION_CLASSIFIER,
                                                                    jsl_anno_py_class=ACR.JSL_anno_HC_ref_2_py_class[
                                                                        H_A.FEW_SHOT_ASSERTION_CLASSIFIER],
                                                                    ),


        ######### OCR ##############
        O_A.IMAGE2TEXT: partial(NluComponent,
                                name=O_A.IMAGE2TEXT,
                                type=T.TEXT_RECOGNIZER,
                                get_default_model=Img2Text.get_default_model,
                                pdf_extractor_methods={'default': default_text_recognizer_config},
                                pdf_col_name_substitutor=substitute_recognized_text_cols,  # TODO substitor
                                output_level=L.DOCUMENT,  # TODO new output level IMG? Or treat as DOC?
                                node=OCR_FEATURE_NODES.nodes[O_A.IMAGE2TEXT],
                                description='Recognize text from image files',
                                provider=ComponentBackends.ocr,
                                license=Licenses.ocr,
                                computation_context=ComputeContexts.spark,
                                output_context=ComputeContexts.spark,
                                jsl_anno_class_id=O_A.IMAGE2TEXT,
                                jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[O_A.IMAGE2TEXT],

                                applicable_file_types=['JPEG', 'PNG', 'BMP', 'WBMP', 'GIF', 'JPG', '.TIFF']
                                ),

        O_A.DOC2TEXT: partial(NluComponent,
                              name=O_A.DOC2TEXT,
                              type=T.TEXT_RECOGNIZER,
                              get_default_model=Doc2Text.get_default_model,
                              pdf_extractor_methods={'default': default_text_recognizer_config},
                              pdf_col_name_substitutor=substitute_recognized_text_cols,  # TODO substitor
                              output_level=L.DOCUMENT,  # TODO new output level IMG? Or treat as DOC?
                              node=OCR_FEATURE_NODES.nodes[O_A.DOC2TEXT],
                              description='Recognize text from DOC/DOCX files',
                              provider=ComponentBackends.ocr,
                              license=Licenses.ocr,
                              computation_context=ComputeContexts.spark,
                              output_context=ComputeContexts.spark,
                              jsl_anno_class_id=O_A.DOC2TEXT,
                              jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[O_A.DOC2TEXT],

                              applicable_file_types=['DOC', 'DOCX']
                              ),

        O_A.PDF2TEXT: partial(NluComponent,
                              name=O_A.PDF2TEXT,
                              type=T.TEXT_RECOGNIZER,
                              get_default_model=Pdf2Text.get_default_model,
                              pdf_extractor_methods={'default': default_text_recognizer_config},
                              pdf_col_name_substitutor=substitute_recognized_text_cols,  # TODO substitor
                              output_level=L.DOCUMENT,  # TODO new output level IMG? Or treat as DOC?
                              node=OCR_FEATURE_NODES.nodes[O_A.PDF2TEXT],
                              description='Recognize text from PDF files',
                              provider=ComponentBackends.ocr,
                              license=Licenses.ocr,
                              computation_context=ComputeContexts.spark,
                              output_context=ComputeContexts.spark,
                              jsl_anno_class_id=O_A.PDF2TEXT,
                              jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[O_A.PDF2TEXT],
                              applicable_file_types=['PDF']
                              ),

        O_A.BINARY2IMAGE: partial(NluComponent,
                                  name=O_A.BINARY2IMAGE,
                                  type=T.HELPER_ANNO,
                                  get_default_model=Binary2Image.get_default_model,
                                  pdf_extractor_methods={'default': default_binary_to_image_config},
                                  pdf_col_name_substitutor=substitute_recognized_text_cols,  # TODO substitor
                                  output_level=L.DOCUMENT,  # TODO new output level IMG? Or treat as DOC?
                                  node=OCR_FEATURE_NODES.nodes[O_A.BINARY2IMAGE],
                                  description='Convert binary image data to OCR image Spark struct representation',
                                  provider=ComponentBackends.ocr,
                                  license=Licenses.ocr,
                                  computation_context=ComputeContexts.spark,
                                  output_context=ComputeContexts.spark,
                                  jsl_anno_class_id=O_A.BINARY2IMAGE,
                                  jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[O_A.BINARY2IMAGE],
                                  applicable_file_types=['JPEG', 'PNG', 'BMP', 'WBMP', 'GIF', 'JPG', 'TIFF']

                                  ),

        O_A.PDF2TEXT_TABLE: partial(NluComponent,
                                    name=O_A.PDF2TEXT_TABLE,
                                    type=T.TABLE_RECOGNIZER,
                                    get_default_model=PDF2TextTable.get_default_model,
                                    pdf_extractor_methods={'default': default_binary_to_image_config},  # TODO EXtractor
                                    pdf_col_name_substitutor=substitute_recognized_text_cols,  # TODO substitor
                                    output_level=L.DOCUMENT,
                                    node=OCR_FEATURE_NODES.nodes[O_A.PDF2TEXT_TABLE],
                                    description='Extract Tables from PDFs with have highlightable text',
                                    provider=ComponentBackends.ocr,
                                    license=Licenses.ocr,
                                    computation_context=ComputeContexts.spark,
                                    output_context=ComputeContexts.spark,
                                    jsl_anno_class_id=O_A.PDF2TEXT_TABLE,
                                    jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[O_A.PDF2TEXT_TABLE],
                                    applicable_file_types=['PDF']

                                    ),

        O_A.PDF2IMAGE: partial(NluComponent,
                                    name=O_A.PDF2IMAGE,
                                    type=T.TABLE_RECOGNIZER,
                                    get_default_model=PDF2Image.get_default_model,
                                    pdf_extractor_methods={'default': default_pdf_to_image_config},  # TODO EXtractor
                                    pdf_col_name_substitutor=substitute_recognized_text_cols,  # TODO substitor
                                    output_level=L.IMAGE,
                                    node=OCR_FEATURE_NODES.nodes[O_A.PDF2IMAGE],
                                    description='Extract Tables from PDFs with have highlightable text',
                                    provider=ComponentBackends.ocr,
                                    license=Licenses.ocr,
                                    computation_context=ComputeContexts.spark,
                                    output_context=ComputeContexts.spark,
                                    jsl_anno_class_id=O_A.PDF2IMAGE,
                                    jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[O_A.PDF2IMAGE],
                                    applicable_file_types=['JPEG', 'PNG', 'PDF']
                                    ),

        O_A.IMAGE2PDF: partial(NluComponent,
                               name=O_A.IMAGE2PDF,
                               type=T.TABLE_RECOGNIZER,
                               get_default_model=Image2PDF.get_default_model,
                               pdf_extractor_methods={'default': default_pdf_to_image_config},  # TODO EXtractor
                               pdf_col_name_substitutor=substitute_recognized_text_cols,  # TODO substitor
                               output_level=L.IMAGE,
                               node=OCR_FEATURE_NODES.nodes[O_A.IMAGE2PDF],
                               description='Extract Tables from PDFs with have highlightable text',
                               provider=ComponentBackends.ocr,
                               license=Licenses.ocr,
                               computation_context=ComputeContexts.spark,
                               output_context=ComputeContexts.spark,
                               jsl_anno_class_id=O_A.IMAGE2PDF,
                               jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[O_A.IMAGE2PDF],
                               applicable_file_types=['JPEG', 'PNG', 'PDF']

                               ),

        O_A.PPT2TEXT_TABLE: partial(NluComponent,
                                    name=O_A.PPT2TEXT_TABLE,
                                    type=T.TABLE_RECOGNIZER,
                                    get_default_model=PPT2TextTable.get_default_model,
                                    pdf_extractor_methods={'default': default_binary_to_image_config},  # TODO EXtractor
                                    pdf_col_name_substitutor=substitute_recognized_text_cols,  # TODO substitor
                                    output_level=L.DOCUMENT,
                                    node=OCR_FEATURE_NODES.nodes[O_A.PPT2TEXT_TABLE],
                                    description='Extract Tables from PPT and PPTX files',
                                    provider=ComponentBackends.ocr,
                                    license=Licenses.ocr,
                                    computation_context=ComputeContexts.spark,
                                    output_context=ComputeContexts.spark,
                                    jsl_anno_class_id=O_A.PPT2TEXT_TABLE,
                                    jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[O_A.PPT2TEXT_TABLE],
                                    applicable_file_types=['PPT', 'PPTX']
                                    ),

        O_A.DOC2TEXT_TABLE: partial(NluComponent,
                                    name=O_A.DOC2TEXT_TABLE,
                                    type=T.TABLE_RECOGNIZER,
                                    get_default_model=Doc2TextTable.get_default_model,
                                    pdf_extractor_methods={'default': default_binary_to_image_config},  # TODO EXtractor
                                    pdf_col_name_substitutor=substitute_recognized_text_cols,  # TODO substitor
                                    output_level=L.DOCUMENT,
                                    node=OCR_FEATURE_NODES.nodes[O_A.DOC2TEXT_TABLE],
                                    description='Extract Tables from PPT and PPTX files',
                                    provider=ComponentBackends.ocr,
                                    license=Licenses.ocr,
                                    computation_context=ComputeContexts.spark,
                                    output_context=ComputeContexts.spark,
                                    jsl_anno_class_id=O_A.DOC2TEXT_TABLE,
                                    jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[O_A.DOC2TEXT_TABLE],
                                    applicable_file_types=['DOCX', 'DOC']
                                    ),

        O_A.IMAGE_TABLE_CELL_DETECTOR: partial(NluComponent,
                                               name=O_A.IMAGE_TABLE_CELL_DETECTOR,
                                               type=T.TEXT_RECOGNIZER,
                                               get_default_model= ImageTableCellDetector.get_default_model,
                                               pdf_extractor_methods={'default': default_text_recognizer_config},
                                               pdf_col_name_substitutor=substitute_recognized_text_cols,  # TODO substitor
                                               output_level=L.DOCUMENT,  # TODO new output level IMG? Or treat as DOC?
                                               node=OCR_FEATURE_NODES.nodes[O_A.IMAGE_TABLE_CELL_DETECTOR],
                                               description='Recognize text from image files',
                                               provider=ComponentBackends.ocr,
                                               license=Licenses.ocr,
                                               computation_context=ComputeContexts.spark,
                                               output_context=ComputeContexts.spark,
                                               jsl_anno_class_id=O_A.IMAGE_TABLE_CELL_DETECTOR,
                                               jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[O_A.IMAGE_TABLE_CELL_DETECTOR],
                                               applicable_file_types=['JPEG', 'PNG', 'BMP', 'WBMP', 'GIF', 'JPG', '.TIFF'],
                                               is_light_pipe_incompatible=True
                                               ),

        O_A.IMAGE_TABLE_CELL2TEXT_TABLE: partial(NluComponent,
                                                 name=O_A.IMAGE_TABLE_CELL2TEXT_TABLE,
                                                 type=T.TEXT_RECOGNIZER,
                                                 get_default_model=ImageTable2Cell2TextTable.get_default_model,
                                                 pdf_extractor_methods={'default': default_text_recognizer_config},
                                                 pdf_col_name_substitutor=substitute_recognized_text_cols,  # TODO substitor
                                                 output_level=L.DOCUMENT,  # TODO new output level IMG? Or treat as DOC?
                                                 node=OCR_FEATURE_NODES.nodes[O_A.IMAGE_TABLE_CELL2TEXT_TABLE],
                                                 description='Recognize text from image files',
                                                 provider=ComponentBackends.ocr,
                                                 license=Licenses.ocr,
                                                 computation_context=ComputeContexts.spark,
                                                 output_context=ComputeContexts.spark,
                                                 jsl_anno_class_id=O_A.IMAGE_TABLE_CELL2TEXT_TABLE,
                                                 jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[O_A.IMAGE_TABLE_CELL2TEXT_TABLE],
                                                 applicable_file_types=['JPEG', 'PNG', 'BMP', 'WBMP', 'GIF', 'JPG', '.TIFF'],
                                                 is_light_pipe_incompatible=True
                                                 ),

        O_A.IMAGE_TABLE_DETECTOR: partial(NluComponent,
                                          name=O_A.IMAGE_TABLE_DETECTOR,
                                          type=T.TABLE_RECOGNIZER,
                                          get_default_model=IMAGE_TABLE_DETECTOR.get_default_model,
                                          pdf_extractor_methods={'default': default_binary_to_image_config},
                                          pdf_col_name_substitutor=substitute_recognized_text_cols,
                                          output_level=L.DOCUMENT,
                                          node=OCR_FEATURE_NODES.nodes[O_A.IMAGE_TABLE_DETECTOR],
                                          description='Detect Tables from Images',
                                          provider=ComponentBackends.ocr,
                                          license=Licenses.ocr,
                                          computation_context=ComputeContexts.spark,
                                          output_context=ComputeContexts.spark,
                                          jsl_anno_class_id=O_A.IMAGE_TABLE_DETECTOR,
                                          jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[O_A.IMAGE_TABLE_DETECTOR],
                                          applicable_file_types=['PDF','JPEG', 'PNG'],
                                          is_light_pipe_incompatible=True
                                          ),

        O_A.IMAGE_SPLIT_REGIONS: partial(NluComponent,
                                         name=O_A.IMAGE_SPLIT_REGIONS,
                                         type=T.OCR_UTIL,
                                         get_default_model=ImageSplitRegions.get_default_model,
                                         pdf_extractor_methods={'default': default_binary_to_image_config},
                                         pdf_col_name_substitutor=substitute_recognized_text_cols,
                                         output_level=L.DOCUMENT,
                                         node=OCR_FEATURE_NODES.nodes[O_A.IMAGE_SPLIT_REGIONS],
                                         description='Convert Image to split regions',
                                         provider=ComponentBackends.ocr,
                                         license=Licenses.ocr,
                                         computation_context=ComputeContexts.spark,
                                         output_context=ComputeContexts.spark,
                                         jsl_anno_class_id=O_A.IMAGE_SPLIT_REGIONS,
                                         jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[
                                             O_A.IMAGE_SPLIT_REGIONS],
                                         applicable_file_types=['DOCX', 'DOC', 'JPEG', 'PNG'],
                                         is_light_pipe_incompatible=True
                                         ),

        O_A.IMAGE_DRAW_REGIONS: partial(NluComponent,
                                         name=O_A.IMAGE_DRAW_REGIONS,
                                         type=T.OCR_UTIL,
                                         get_default_model=ImageDrawRegions.get_default_model,
                                         pdf_extractor_methods={'default': default_binary_to_image_config},
                                         pdf_col_name_substitutor=substitute_recognized_text_cols,
                                         output_level=L.DOCUMENT,
                                         node=OCR_FEATURE_NODES.nodes[O_A.IMAGE_DRAW_REGIONS],
                                         description='Convert Image to split regions',
                                         provider=ComponentBackends.ocr,
                                         license=Licenses.ocr,
                                         computation_context=ComputeContexts.spark,
                                         output_context=ComputeContexts.spark,
                                         jsl_anno_class_id=O_A.IMAGE_DRAW_REGIONS,
                                         jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[
                                             O_A.IMAGE_DRAW_REGIONS],
                                         applicable_file_types=['DOCX', 'DOC', 'JPEG', 'PNG', 'PDF']
                                         ),

        O_A.VISUAL_DOCUMENT_CLASSIFIER: partial(NluComponent,
                                                name=O_A.VISUAL_DOCUMENT_CLASSIFIER,
                                                type=T.PDF_BUILDER,
                                                get_default_model=VisualDocClassifier.get_default_model,
                                                pdf_extractor_methods={'default': default_visual_classifier_config},
                                                # TODO EXtractor
                                                pdf_col_name_substitutor=substitute_document_classifier_text_cols,
                                                # TODO substitor
                                                output_level=L.DOCUMENT,
                                                node=OCR_FEATURE_NODES.nodes[O_A.VISUAL_DOCUMENT_CLASSIFIER],
                                                description='Convert text to PDF file',
                                                provider=ComponentBackends.ocr,
                                                license=Licenses.ocr,
                                                computation_context=ComputeContexts.spark,
                                                output_context=ComputeContexts.spark,
                                                jsl_anno_class_id=O_A.VISUAL_DOCUMENT_CLASSIFIER,
                                                jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[
                                                    O_A.VISUAL_DOCUMENT_CLASSIFIER],
                                                applicable_file_types=['JPG', 'JPEG']
                                                ),

        O_A.IMAGE2HOCR: partial(NluComponent,
                                name=O_A.IMAGE2HOCR,
                                type=T.OCR_UTIL,
                                get_default_model=Image2Hocr.get_default_model,
                                # TODO EXtractor0
                                pdf_extractor_methods={'default': default_binary_to_image_config},
                                # TODO substitor
                                pdf_col_name_substitutor=substitute_recognized_text_cols,
                                output_level=L.DOCUMENT,
                                node=OCR_FEATURE_NODES.nodes[O_A.IMAGE2HOCR],
                                description='Convert text to PDF file',
                                provider=ComponentBackends.ocr,
                                license=Licenses.ocr,
                                computation_context=ComputeContexts.spark,
                                output_context=ComputeContexts.spark,
                                jsl_anno_class_id=O_A.IMAGE2HOCR,
                                jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[
                                    O_A.IMAGE2HOCR],
                                applicable_file_types=['DOCX', 'DOC'],
                                ),

        O_A.HOCR_TOKENIZER: partial(NluComponent,
                                    name=O_A.HOCR_TOKENIZER,
                                    type=T.OCR_UTIL,
                                    get_default_model=HocrTokenizer.get_default_model,
                                    # TODO EXtractor0
                                    pdf_extractor_methods={'default': default_binary_to_image_config},
                                    # TODO substitor
                                    pdf_col_name_substitutor=substitute_recognized_text_cols,
                                    output_level=L.DOCUMENT,
                                    node=OCR_FEATURE_NODES.nodes[O_A.HOCR_TOKENIZER],
                                    description='Convert text to PDF file',
                                    provider=ComponentBackends.ocr,
                                    license=Licenses.ocr,
                                    computation_context=ComputeContexts.spark,
                                    output_context=ComputeContexts.spark,
                                    jsl_anno_class_id=O_A.HOCR_TOKENIZER,
                                    jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[
                                        O_A.HOCR_TOKENIZER],
                                    applicable_file_types=['DOCX', 'DOC'],
                                    ),

        O_A.VISUAL_DOCUMENT_NER: partial(NluComponent,
                                         name=O_A.VISUAL_DOCUMENT_NER,
                                         type=T.PDF_BUILDER,
                                         get_default_model=VisualDocumentNer.get_default_model,
                                         pdf_extractor_methods={'default': default_visual_ner_config},
                                         # TODO EXtractor
                                         pdf_col_name_substitutor=substitute_document_ner_cols,
                                         # TODO substitor
                                         output_level=L.CHUNK,
                                         node=OCR_FEATURE_NODES.nodes[O_A.VISUAL_DOCUMENT_NER],
                                         description='Convert text to PDF file',
                                         provider=ComponentBackends.ocr,
                                         license=Licenses.ocr,
                                         computation_context=ComputeContexts.spark,
                                         output_context=ComputeContexts.spark,
                                         jsl_anno_class_id=O_A.VISUAL_DOCUMENT_NER,
                                         jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[
                                             O_A.VISUAL_DOCUMENT_NER],
                                         applicable_file_types=['JPG', 'JPEG']
                                         ),

        O_A.FORM_RELATION_EXTRACTOR: partial(NluComponent,
                                             name=O_A.FORM_RELATION_EXTRACTOR,
                                             type=T.TEXT_RECOGNIZER,
                                             get_default_model=FormRelationExtractor.get_default_model,
                                             # TODO EXtractor0
                                             pdf_extractor_methods={'default': default_form_relation_extractor_config},
                                             # TODO substitor
                                             pdf_col_name_substitutor=substitute_form_extractor_text_cols,
                                             output_level=L.RELATION,
                                             node=OCR_FEATURE_NODES.nodes[O_A.FORM_RELATION_EXTRACTOR],
                                             description='Convert text to PDF file',
                                             provider=ComponentBackends.ocr,
                                             license=Licenses.ocr,
                                             computation_context=ComputeContexts.spark,
                                             output_context=ComputeContexts.spark,
                                             jsl_anno_class_id=O_A.FORM_RELATION_EXTRACTOR,
                                             jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[
                                                 O_A.FORM_RELATION_EXTRACTOR],
                                             applicable_file_types=['DOCX', 'DOC'],
                                             ),
        O_A.POSITION_FINDER: partial(NluComponent,
                                             name=O_A.POSITION_FINDER,
                                             type=T.OCR_UTIL,
                                             get_default_model=PositionFinder.get_default_model,
                                             # TODO EXtractor0
                                             pdf_extractor_methods={'default': default_position_finder_config},
                                             # TODO substitor
                                             pdf_col_name_substitutor=substitute_recognized_text_cols,
                                             output_level=L.RELATION,
                                             node=OCR_FEATURE_NODES.nodes[O_A.POSITION_FINDER],
                                             description='Convert text to PDF file',
                                             provider=ComponentBackends.ocr,
                                             license=Licenses.ocr,
                                             computation_context=ComputeContexts.spark,
                                             output_context=ComputeContexts.spark,
                                             jsl_anno_class_id=O_A.POSITION_FINDER,
                                             jsl_anno_py_class=ACR.JSL_anno_OCR_ref_2_py_class[
                                                 O_A.POSITION_FINDER],
                                             applicable_file_types=['DOCX', 'DOC'],
                                             ),

    }
